<a href="https://colab.research.google.com/github/Abdulaziz-Abdullah-Hazazi/Data-analysis-for-electric-vehicles/blob/main/Copy_of_mount_tuwait.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Part 1 Data Exploration and Preprocessing

- Data preprocessing tasks such as handling missing values, encoding

- Categorical variables, and scaling numerical features.

In [None]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.base import BaseEstimator, TransformerMixin


# Load the dataset
pd.set_option('display.max_columns', None)
df = pd.read_csv("train.csv")

# Define custom transformer for handling date columns
class DateTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X['Program Start Date'] = pd.to_datetime(X['Program Start Date'])
        X['Program End Date'] = pd.to_datetime(X['Program End Date'])
        X['Start_year'] = X['Program Start Date'].dt.year
        X['Start_month'] = X['Program Start Date'].dt.month
        X['Start_day'] = X['Program Start Date'].dt.day
        X['End_year'] = X['Program End Date'].dt.year
        X['End_month'] = X['Program End Date'].dt.month
        X['End_day'] = X['Program End Date'].dt.day
        X = X.drop(columns=['Program Start Date', 'Program End Date'])
        return X

# Define columns by data type
categorical_cols = ["Gender", "Home Region", "Home City", "Program ID", "Technology Type",
                    "Program Skill Level", "Program Presentation Method", "Completed Degree",
                    "Level of Education", "Education Speaciality", "College", "Employment Status",
                    "Job Type", "Still Working"]
numerical_cols = ["Age", "Program Days","University Degree Score", "University Degree Score System"]

# Pipeline for numerical features
numerical_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="mean")),  # Handle missing values by replacing with mean
    ("scaler", StandardScaler())  # Scale numerical features
])

# Pipeline for categorical features
categorical_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),  # Handle missing values by replacing with mode
    ("encoder", OneHotEncoder())  # One-hot encode categorical features
])

# ColumnTransformer to apply different pipelines to different columns
preprocessor = ColumnTransformer([
    ("num", numerical_pipeline, numerical_cols),
    ("cat", categorical_pipeline, categorical_cols)
])

# Full pipeline including preprocessor and date transformer
pipeline = Pipeline([
    ("date_transformer", DateTransformer()),
    ("preprocessor", preprocessor)
])

# Separate features and target variable
X = df.drop(columns=["Y"])  # Features
y = df["Y"]  # Target variable

# Apply the full pipeline to the features
X_processed = pipeline.fit_transform(X)

# Display missing values and basic statistics
missing_values = df.isnull().sum()
print("Missing Values:")
print(missing_values)
df.info()
df.head()
df.describe()


Missing Values:
Student ID                           0
Age                                 92
Gender                               0
Home Region                          2
Home City                            2
Program ID                           0
Program Main Category Code           0
Program Sub Category Code          935
Technology Type                   2982
Program Skill Level               1646
Program Presentation Method          0
Program Start Date                   0
Program End Date                     0
Program Days                         0
Completed Degree                     0
Level of Education                  26
Education Speaciality              277
College                           3890
University Degree Score             81
University Degree Score System      81
Employment Status                  566
Job Type                          4567
Still Working                     4567
Y                                    0
dtype: int64
<class 'pandas.core.frame.DataFrame

Unnamed: 0,Age,Program Days,University Degree Score,University Degree Score System,Y
count,6456.0,6548.0,6467.0,6467.0,6548.0
mean,26.831165,19.691662,8.224432,9.773929,0.158674
std,5.535967,32.112061,19.120384,21.259962,0.3654
min,18.0,3.0,0.0,4.0,0.0
25%,23.0,5.0,3.3,5.0,0.0
50%,25.0,12.0,4.0,5.0,0.0
75%,29.0,19.0,4.51,5.0,0.0
max,57.0,292.0,100.0,100.0,1.0


Split categorical and numerical columns

In [None]:
# Get all columns except 'Student ID' and 'Y'
all_cols = df.columns.difference(['Student ID', 'Y'])

# Separate categorical and numerical columns
categorical_cols = [col for col in all_cols if df[col].dtype == 'object']
numerical_cols = [col for col in all_cols if col not in categorical_cols]

print("Categorical columns:")
print(categorical_cols)
print("\nNumerical columns:")
print(numerical_cols)


Categorical columns:
['College', 'Completed Degree', 'Education Speaciality', 'Employment Status', 'Gender', 'Home City', 'Home Region', 'Job Type', 'Level of Education', 'Program End Date', 'Program ID', 'Program Main Category Code', 'Program Presentation Method', 'Program Skill Level', 'Program Start Date', 'Program Sub Category Code', 'Still Working', 'Technology Type']

Numerical columns:
['Age', 'Program Days', 'University Degree Score', 'University Degree Score System']


Scaling

In [None]:
df[numerical_cols].isna().sum()

Age                               92
Program Days                       0
University Degree Score           81
University Degree Score System    81
dtype: int64

In [None]:
from sklearn.impute import SimpleImputer

# Define columns to impute
cols_to_impute = ['Age', 'University Degree Score', 'University Degree Score System']

# Impute NaN values with the mean for numerical columns and fill 5 for 'University Degree Score System'
imputer = SimpleImputer(strategy='mean')
df[cols_to_impute] = imputer.fit_transform(df[cols_to_impute])

# Fill specific value 5 for 'University Degree Score System' column
df['University Degree Score System'].fillna(5, inplace=True)

# Display the updated dataframe
print(df[numerical_cols])


       Age  Program Days  University Degree Score  \
0     37.0            12                     2.44   
1     21.0             5                     5.00   
2     24.0            54                     3.50   
3     23.0            33                     3.55   
4     23.0            54                     4.00   
...    ...           ...                      ...   
6543  31.0            66                     4.40   
6544  27.0            12                     4.46   
6545  24.0             5                     4.93   
6546  25.0            12                     4.00   
6547  37.0             5                     4.32   

      University Degree Score System  
0                                4.0  
1                                5.0  
2                                5.0  
3                                5.0  
4                                5.0  
...                              ...  
6543                             5.0  
6544                             5.0  
6545        

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['University Degree Score System'].fillna(5, inplace=True)


Part 2 sacling numerical features

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Define numerical columns
numerical_cols = ['Age', 'University Degree Score', 'University Degree Score System']

# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Scale numerical columns using MinMaxScaler
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

# Display the scaled numerical columns
print(df[numerical_cols])


           Age  University Degree Score  University Degree Score System
0     0.487179                   0.0244                        0.000000
1     0.076923                   0.0500                        0.010417
2     0.153846                   0.0350                        0.010417
3     0.128205                   0.0355                        0.010417
4     0.128205                   0.0400                        0.010417
...        ...                      ...                             ...
6543  0.333333                   0.0440                        0.010417
6544  0.230769                   0.0446                        0.010417
6545  0.153846                   0.0493                        0.010417
6546  0.179487                   0.0400                        0.000000
6547  0.487179                   0.0432                        0.010417

[6548 rows x 3 columns]


In [None]:
df[categorical_cols].isna().sum()

College                        3890
Completed Degree                  0
Education Speaciality           277
Employment Status               566
Gender                            0
Home City                         2
Home Region                       2
Job Type                       4567
Level of Education               26
Program End Date                  0
Program ID                        0
Program Main Category Code        0
Program Presentation Method       0
Program Skill Level            1646
Program Start Date                0
Program Sub Category Code       935
Still Working                  4567
Technology Type                2982
dtype: int64

In [None]:
# Initialize a dictionary to store unique values for each categorical column
unique_values_dict = {}

# Print unique values for each column
print("Unique values for each column:\n")
for col in categorical_cols:
    unique_values = df[col].dropna().unique()
    unique_values_dict[col] = unique_values
    print(col + ':', len(unique_values))

# Print examples
print("\n\nExamples\n")
for col, unique_values in unique_values_dict.items():
    if len(unique_values) > 15:
        print(col + ':', unique_values[:5])
    else:
        print(col + ':', unique_values)


Unique values for each column:

College: 9
Completed Degree: 2
Education Speaciality: 871
Employment Status: 6
Gender: 2
Home City: 92
Home Region: 13
Job Type: 4
Level of Education: 5
Program End Date: 88
Program ID: 223
Program Main Category Code: 10
Program Presentation Method: 2
Program Skill Level: 3
Program Start Date: 77
Program Sub Category Code: 11
Still Working: 2
Technology Type: 3


Examples

College: ['الفنون والعلوم الإنسانية' 'تكنولوجيا الاتصالات والمعلومات'
 'العلوم الاجتماعية والصحافة والإعلام'
 'العلوم الطبيعية والرياضيات والإحصاء' 'الهندسة والتصنيع والبناء'
 'الأعمال والإدارة والقانون' 'التعليم' 'الصحة والرفاة'
 'البرامج والمؤهلات العامة']
Completed Degree: ['نعم' 'لا']
Education Speaciality: ['هندسة حاسب الالي' 'الإذاعة والتلفزيون والفيلم' 'Information Technology'
 'حوسبة تطبيقية - (مسار شبكات الحاسب)' 'نظم المعلومات الحاسوبية']
Employment Status: ['غير موظف' 'طالب' 'موظف' 'خريج' 'موظف - طالب' 'عمل حر']
Gender: ['ذكر' 'أنثى']
Home City: ['الرياض' 'خميس مشيط' 'حفر ال

No numerical columns

In [None]:
For_label_enc_col = ['Home Region','Home City','Program Main Category Code',
                     'Program Sub Category Code','College']
df[For_label_enc_col]

Unnamed: 0,Home Region,Home City,Program Main Category Code,Program Sub Category Code,College
0,منطقة الرياض,الرياض,PCRF,PCRF,
1,منطقة عسير,خميس مشيط,APMR,SWPS,الفنون والعلوم الإنسانية
2,منطقة الرياض,الرياض,APMR,,
3,منطقة الرياض,الرياض,TOSL,TOSL,
4,منطقة الرياض,الرياض,CAUF,SWPS,تكنولوجيا الاتصالات والمعلومات
...,...,...,...,...,...
6543,منطقة الرياض,الرياض,CAUF,SWPS,تكنولوجيا الاتصالات والمعلومات
6544,منطقة القصيم,بريدة,PCRF,PCRF,
6545,منطقة الرياض,الرياض,PCRF,PCRF,تكنولوجيا الاتصالات والمعلومات
6546,منطقة الرياض,الرياض,PCRF,PCRF,تكنولوجيا الاتصالات والمعلومات


Handle nulls

In [None]:
# Fill nulls
for col in For_label_enc_col:
    df[col].fillna(method='bfill', inplace=True)
    df[col].fillna(method='ffill', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(method='bfill', inplace=True)
  df[col].fillna(method='bfill', inplace=True)
  df[col].fillna(method='ffill', inplace=True)


Encoding

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoders = {}
for col in For_label_enc_col:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

df[For_label_enc_col]

Unnamed: 0,Home Region,Home City,Program Main Category Code,Program Sub Category Code,College
0,4,23,6,6,6
1,10,62,1,9,6
2,4,23,1,10,8
3,4,23,9,10,8
4,4,23,2,9,8
...,...,...,...,...,...
6543,4,23,2,9,8
6544,5,45,6,6,8
6545,4,23,6,6,8
6546,4,23,6,6,8


In [None]:
# One-hot encode 'Gender' column
gender = pd.get_dummies(df['Gender'], drop_first=True, dtype=int)

# Fill missing values in 'Technology Type' column
df['Technology Type'].fillna(method='bfill', inplace=True)
df['Technology Type'].fillna(method='ffill', inplace=True)
# One-hot encode 'Technology Type' column
tech_type = pd.get_dummies(df['Technology Type'], drop_first=True, dtype=int)

# Fill missing values in 'Program Skill Level' column
df['Program Skill Level'].fillna(method='bfill', inplace=True)
df['Program Skill Level'].fillna(method='ffill', inplace=True)
# One-hot encode 'Program Skill Level' column
prog_skill = pd.get_dummies(df['Program Skill Level'], drop_first=True, dtype=int)

# One-hot encode 'Program Presentation Method' column
prog_pres = pd.get_dummies(df['Program Presentation Method'], drop_first=True, dtype=int)

# One-hot encode 'Completed Degree' column and rename columns
degree = pd.get_dummies(df['Completed Degree'], drop_first=True, dtype=int)
degree.rename(columns={'نعم': 'degree_completed'}, inplace=True)
df['Level of Education'].fillna(method='bfill', inplace=True) # test contain different values
df['Level of Education'].fillna(method='ffill', inplace=True)

# Categorical encoding
tech_majors = ['حاسب',  'معلومات','تقنية', 'بيانات', 'cis', 'computer', 'اتصالات','it', 'شبكات','software','رقمي','network','mis','مواقع','cs',
            'برمجيات', 'امن سيبراني','اكترونيات', 'سيبراني', 'لكترونيات','ذكاء','data', 'information', 'technology','communication','حوسب','حاسب'
            ,'كمبيوتر','برمجه','gis','computing','conputer', 'لكتروني','artificial','artificial','artifical intelligence','صطناعي','سبراني','تكنولوجيا','ويب','وسائط',
            'برمجة','artifical','web','system','iot','programming','softeare','تطوير','multimedia','الحاشب']

for i in range(len(df)):
    maj = str(df['Education Speaciality'].iloc[i]).lower()
    if any(word in maj for word in tech_majors):
        df['Education Speaciality'].iloc[i] = 'تقني'
    else:
        df['Education Speaciality'].iloc[i] = 'غير تقني'


edu_spe = pd.get_dummies(df['Education Speaciality'], drop_first=True, dtype=int)

# Fill missing values in 'Level of Education' column and categorize
df['Level of Education'].fillna(method='bfill', inplace=True)
df['Level of Education'].fillna(method='ffill', inplace=True)
edu_lvl_class = ['البكالوريوس', 'الماجستير', 'ثانوي', 'الدكتوراه','غير معروف']
for i in range(len(df)):
    lvl = str(df['Level of Education'].iloc[i]).lower()
    if lvl not in edu_lvl_class:
        df['Level of Education'].iloc[i] = 'غير معروف'
# One-hot encode 'Level of Education' column
edu_lvl = pd.get_dummies(df['Level of Education'], drop_first=True, dtype=int)

# Fill missing values in 'Employment Status' column
df['Employment Status'].fillna(method='bfill', inplace=True)
df['Employment Status'].fillna(method='ffill', inplace=True)
# One-hot encode 'Employment Status' column
emp_stat = pd.get_dummies(df['Employment Status'], drop_first=True, dtype=int)

# Fill missing values in 'Job Type' column
df['Job Type'].fillna(method='bfill', inplace=True)
df['Job Type'].fillna(method='ffill', inplace=True)
# One-hot encode 'Job Type' column
job_typ = pd.get_dummies(df['Job Type'], drop_first=True, dtype=int)

# Fill missing values in 'Still Working' column
df['Still Working'].fillna(method='bfill', inplace=True)
df['Still Working'].fillna(method='ffill', inplace=True)
# One-hot encode 'Still Working' column and rename columns
still_work = pd.get_dummies(df['Still Working'], drop_first=True, dtype=int)
still_work.rename(columns={'Yes': 'still_working'}, inplace=True)


Concatenate all encoded columns

In [None]:
# Concatenate all encoded columns into one DataFrame
oneCool_col_df = pd.concat([gender, tech_type, prog_skill, prog_pres, degree, edu_lvl, edu_spe, emp_stat, job_typ, still_work], axis=1)

# Display the first few rows of the concatenated DataFrame
oneCool_col_df.head()


Unnamed: 0,ذكر,داعمة,ناشئة,متقدم,متوسط,عن بعد,degree_completed,الدكتوراه,الماجستير,ثانوي,غير معروف,غير تقني,طالب,عمل حر,غير موظف,موظف,موظف - طالب,تطوع,دوام جزئي,دوام كامل,still_working
0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1
1,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,1
2,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1
3,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1
4,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1


a. Split the preprocessed data into training and testing sets.

b. Implement machine learning models to predict student persistence and completion rates. We'll start with logistic regression and decision trees.

c. Evaluate the performance of each model using appropriate evaluation metrics such as accuracy, precision, recall, or F1-score.

Here's how you can do it in Python:

Finalizing preprocessing

In [None]:
y = df['Y']

processed_df = pd.concat([df[numerical_cols].copy(),df[For_label_enc_col],oneCool_col_df,y],axis=1)
processed_df

Unnamed: 0,Age,University Degree Score,University Degree Score System,Home Region,Home City,Program Main Category Code,Program Sub Category Code,College,ذكر,داعمة,ناشئة,متقدم,متوسط,عن بعد,degree_completed,الدكتوراه,الماجستير,ثانوي,غير معروف,غير تقني,طالب,عمل حر,غير موظف,موظف,موظف - طالب,تطوع,دوام جزئي,دوام كامل,still_working,Y
0,0.487179,0.0244,0.000000,4,23,6,6,6,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0
1,0.076923,0.0500,0.010417,10,62,1,9,6,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0
2,0.153846,0.0350,0.010417,4,23,1,10,8,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0
3,0.128205,0.0355,0.010417,4,23,9,10,8,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0
4,0.128205,0.0400,0.010417,4,23,2,9,8,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6543,0.333333,0.0440,0.010417,4,23,2,9,8,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0
6544,0.230769,0.0446,0.010417,5,45,6,6,8,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0
6545,0.153846,0.0493,0.010417,4,23,6,6,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1
6546,0.179487,0.0400,0.000000,4,23,6,6,8,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0


In [None]:
# with scaling
correlation_with_y = processed_df.corr()['Y'].drop('Y')
abs_corr_with_y_sorted = correlation_with_y.abs().sort_values()
high_corr_cols = abs_corr_with_y_sorted[abs_corr_with_y_sorted > 0.001].keys() # threshold of 0.001 is arbitrary
high_corr_cols

Index(['عمل حر', 'College', 'دوام كامل', 'داعمة', 'دوام جزئي', 'الدكتوراه',
       'غير موظف', 'موظف - طالب', 'Age', 'ناشئة', 'متقدم', 'الماجستير', 'ذكر',
       'تطوع', 'غير معروف', 'موظف', 'متوسط', 'Program Main Category Code',
       'Home Region', 'University Degree Score',
       'University Degree Score System', 'Program Sub Category Code',
       'Home City', 'طالب', 'ثانوي', 'غير تقني', 'عن بعد', 'degree_completed'],
      dtype='object')

Model building

In [None]:
import pandas as pd
from imblearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_predict,train_test_split
# Assuming processed_df is already prepared
X = processed_df.drop('Y', axis=1)
y = processed_df['Y']

# Define the pipeline
pipeline = Pipeline([
    ('smote', SMOTE(random_state=42)),  # SMOTE sampling
    ('classifier', RandomForestClassifier())  # Random Forest classifier
])

# Define parameters grid for the Random Forest Classifier
param_grid = {
    'classifier__max_depth': [10, 40, 50, 70],
    'classifier__min_samples_leaf': [4, 5, 8],
    'classifier__min_samples_split': [2, 5, 10],
    'classifier__n_estimators': [400, 1000, 1600, 1800, 2000]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    scoring='f1',
    cv=5,
    verbose=2,
    n_jobs=-1
)

# Fit the model
grid_search.fit(X, y)

# Get the best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Print best parameters and score
print(best_params)
print(best_score)

# Split data for final evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# Use best estimator from GridSearchCV for final evaluation
final_model = grid_search.best_estimator_

# Evaluate the model
report = classification_report(y_test, final_model.predict(X_test))
print("Classification Report: ")
print(report)


In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)
test = pd.read_csv('test.csv')

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from category_encoders import OrdinalEncoder, OneHotEncoder

def processing(df):
    # Convert date columns to datetime
    date_columns = ['Program Start Date', 'Program End Date']
    df[date_columns] = df[date_columns].apply(pd.to_datetime)

    # Extract date components
    for col in date_columns:
        df[col + '_year'] = df[col].dt.year
        df[col + '_month'] = df[col].dt.month
        df[col + '_day'] = df[col].dt.day

    # Drop unnecessary columns
    df = df.drop(columns=['Program Start Date', 'Program End Date', 'Program ID'])

    # Impute missing values and scale numerical columns
    numerical_cols = df.select_dtypes(include=['number']).columns
    scaler = StandardScaler()
    df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

    # Impute missing values and encode categorical columns
    categorical_cols = df.select_dtypes(include=['object']).columns
    categorical_cols = categorical_cols.drop('Student ID')

    # Ordinal encode categorical columns
    label_encoders = OrdinalEncoder(handle_missing='value')
    df[categorical_cols] = label_encoders.fit_transform(df[categorical_cols])

    # One-hot encode remaining categorical columns
    one_hot_encoder = OneHotEncoder(handle_missing='value', drop_first=True)
    df = pd.concat([df, one_hot_encoder.fit_transform(df[categorical_cols])], axis=1)

    # Drop original categorical columns
    df = df.drop(columns=categorical_cols)

    return df
df


Unnamed: 0,Student ID,Age,Gender,Home Region,Home City,Program ID,Program Main Category Code,Program Sub Category Code,Technology Type,Program Skill Level,Program Presentation Method,Program Start Date,Program End Date,Program Days,Completed Degree,Level of Education,Education Speaciality,College,University Degree Score,University Degree Score System,Employment Status,Job Type,Still Working,Y
0,4f14c50d-162e-4a15-9cf0-ec129c33bcf0,0.487179,ذكر,4,23,453686d8-4023-4506-b2df-fac8b059ac26,6,6,تقليدية,متوسط,حضوري,2023-05-28,2023-06-08,12,نعم,البكالوريوس,تقني,6,0.0244,0.000000,غير موظف,دوام كامل,Yes,0
1,0599d409-876b-41a5-af05-749ef0e77d32,0.076923,ذكر,10,62,cc8e4e42-65d5-4fa1-82f9-6c6c2d508b60,1,9,تقليدية,متوسط,حضوري,2023-04-02,2023-04-06,5,نعم,البكالوريوس,غير تقني,6,0.0500,0.010417,طالب,دوام كامل,Yes,0
2,38a11c0e-4afc-4261-9c64-e94cc0a272fb,0.153846,ذكر,4,23,e006900d-05a9-4c2b-a36f-0ffb9fce44cd,1,10,تقليدية,متوسط,حضوري,2023-07-23,2023-09-14,54,نعم,البكالوريوس,تقني,8,0.0350,0.010417,موظف,دوام كامل,Yes,0
3,1693e85b-f80e-40ce-846f-395ddcece6d3,0.128205,ذكر,4,23,2ec15f6b-233b-428a-b9f5-e40bc8d14cf9,9,10,تقليدية,متوسط,حضوري,2023-07-23,2023-08-24,33,نعم,البكالوريوس,تقني,8,0.0355,0.010417,خريج,دوام كامل,Yes,0
4,98a0e8d0-5f80-4634-afd8-322aa0902863,0.128205,ذكر,4,23,d32da0e9-1aed-48c3-992d-a22f9ccc741e,2,9,تقليدية,متوسط,حضوري,2023-04-30,2023-06-22,54,لا,البكالوريوس,تقني,8,0.0400,0.010417,موظف,دوام كامل,Yes,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6543,cd196579-9590-441b-8787-41078f3cee25,0.333333,أنثى,4,23,4f8c696a-b783-4d40-9776-105f6d3bd624,2,9,تقليدية,مبتدئ,حضوري,2023-04-02,2023-06-06,66,نعم,البكالوريوس,تقني,8,0.0440,0.010417,موظف,دوام كامل,Yes,0
6544,37bfc11c-ff8c-42dc-9cf9-0d13bb8f7131,0.230769,أنثى,5,45,e94942dd-8684-4746-97ae-df567b9b0a4a,6,6,تقليدية,مبتدئ,عن بعد,2023-05-14,2023-05-25,12,نعم,البكالوريوس,تقني,8,0.0446,0.010417,موظف,دوام كامل,Yes,0
6545,fc114302-a79f-439f-a08b-fe0a51cf839e,0.153846,أنثى,4,23,02ae0b47-64a6-47a1-b3c5-c0e4df393c30,6,6,تقليدية,مبتدئ,حضوري,2023-07-16,2023-07-20,5,لا,البكالوريوس,تقني,8,0.0493,0.010417,موظف,دوام كامل,Yes,1
6546,4b6d9a36-4402-4c75-bc3a-fca927dbaf65,0.179487,ذكر,4,23,9b4cedaa-fac0-4eac-aa4b-b05b6a0c97ff,6,6,تقليدية,متوسط,حضوري,2024-01-07,2024-01-18,12,نعم,البكالوريوس,تقني,8,0.0400,0.000000,غير موظف,تدريب,No,0
