In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from keras.callbacks import ModelCheckpoint
import numpy as np
#  Import and read the charity_data.csv.
import pandas as pd 
student_df  = pd.read_csv('Resources/students_adaptability.csv')
student_df.head()

Unnamed: 0,Gender,Age,Education Level,Institution Type,IT Student,Location,Load-shedding,Financial Condition,Internet Type,Network Type,Class Duration,Self Lms,Device,Adaptivity Level
0,Boy,21-25,University,Non Government,No,Yes,Low,Mid,Wifi,4G,3-6,No,Tab,Moderate
1,Girl,21-25,University,Non Government,No,Yes,High,Mid,Mobile Data,4G,1-3,Yes,Mobile,Moderate
2,Girl,16-20,College,Government,No,Yes,Low,Mid,Wifi,4G,1-3,No,Mobile,Moderate
3,Girl,11-15,School,Non Government,No,Yes,Low,Mid,Mobile Data,4G,1-3,No,Mobile,Moderate
4,Girl,16-20,School,Non Government,No,Yes,Low,Poor,Mobile Data,3G,0,No,Mobile,Low


In [2]:

student_df.drop(['Location','IT Student'], axis=1, inplace=True)
student_df.head()

Unnamed: 0,Gender,Age,Education Level,Institution Type,Load-shedding,Financial Condition,Internet Type,Network Type,Class Duration,Self Lms,Device,Adaptivity Level
0,Boy,21-25,University,Non Government,Low,Mid,Wifi,4G,3-6,No,Tab,Moderate
1,Girl,21-25,University,Non Government,High,Mid,Mobile Data,4G,1-3,Yes,Mobile,Moderate
2,Girl,16-20,College,Government,Low,Mid,Wifi,4G,1-3,No,Mobile,Moderate
3,Girl,11-15,School,Non Government,Low,Mid,Mobile Data,4G,1-3,No,Mobile,Moderate
4,Girl,16-20,School,Non Government,Low,Poor,Mobile Data,3G,0,No,Mobile,Low


In [3]:
# Determine the number of unique values in each column.
unique_value_counts = student_df.nunique()
print(unique_value_counts)

Gender                 2
Age                    6
Education Level        3
Institution Type       2
Load-shedding          2
Financial Condition    3
Internet Type          2
Network Type           3
Class Duration         3
Self Lms               2
Device                 3
Adaptivity Level       3
dtype: int64


In [4]:
# Define a function to convert the 'Class Duration' to hours
def convert_to_hours(duration_str):
    # Split the string by '-'
    parts = duration_str.split('-')
    
    # Check if it's in the format 'X-Mar' (e.g., 6-Mar)
    if len(parts) == 2:
        return int(parts[0]) * 30  # Assuming 1 month = 30 days (approx.)
    
    # Check if it's in the format 'X-Jan' (e.g., 3-Jan)
    elif len(parts) == 2 and parts[1].isdigit():
        return int(parts[0])  # Number of days
    
    # If the format is not recognized, return None
    else:
        return 0

# Apply the function to the 'Class Duration' column
student_df['Class Duration (hours)'] = student_df['Class Duration'].apply(convert_to_hours)

# Print the updated DataFrame
student_df.head()

Unnamed: 0,Gender,Age,Education Level,Institution Type,Load-shedding,Financial Condition,Internet Type,Network Type,Class Duration,Self Lms,Device,Adaptivity Level,Class Duration (hours)
0,Boy,21-25,University,Non Government,Low,Mid,Wifi,4G,3-6,No,Tab,Moderate,90
1,Girl,21-25,University,Non Government,High,Mid,Mobile Data,4G,1-3,Yes,Mobile,Moderate,30
2,Girl,16-20,College,Government,Low,Mid,Wifi,4G,1-3,No,Mobile,Moderate,30
3,Girl,11-15,School,Non Government,Low,Mid,Mobile Data,4G,1-3,No,Mobile,Moderate,30
4,Girl,16-20,School,Non Government,Low,Poor,Mobile Data,3G,0,No,Mobile,Low,0


In [5]:
# Determine the number of unique values in each column.
unique_value_counts = student_df.nunique()
print(unique_value_counts)

Gender                    2
Age                       6
Education Level           3
Institution Type          2
Load-shedding             2
Financial Condition       3
Internet Type             2
Network Type              3
Class Duration            3
Self Lms                  2
Device                    3
Adaptivity Level          3
Class Duration (hours)    3
dtype: int64


In [6]:

# Define a function to convert the date format
def convert_date_format(date_str):
    parts = date_str.split('-')
    if len(parts) == 2 and parts[0].isdigit() and parts[1].isalpha():
        day = parts[0]
        month = parts[1]
        return f"{day} to {month}"
    else:
        return date_str  # Return unchanged if the format is not recognized

# Apply the conversion function to the 'Age' column
student_df['Age'] = student_df['Age'].apply(convert_date_format)

# Print the updated DataFrame
student_df.head()

Unnamed: 0,Gender,Age,Education Level,Institution Type,Load-shedding,Financial Condition,Internet Type,Network Type,Class Duration,Self Lms,Device,Adaptivity Level,Class Duration (hours)
0,Boy,21-25,University,Non Government,Low,Mid,Wifi,4G,3-6,No,Tab,Moderate,90
1,Girl,21-25,University,Non Government,High,Mid,Mobile Data,4G,1-3,Yes,Mobile,Moderate,30
2,Girl,16-20,College,Government,Low,Mid,Wifi,4G,1-3,No,Mobile,Moderate,30
3,Girl,11-15,School,Non Government,Low,Mid,Mobile Data,4G,1-3,No,Mobile,Moderate,30
4,Girl,16-20,School,Non Government,Low,Poor,Mobile Data,3G,0,No,Mobile,Low,0


In [7]:
# Look at Age value counts for binning
age_counts = student_df['Age'].value_counts()
age_counts

Age
21-25    374
11-15    353
16-20    278
1-5       81
26-30     68
6-10      51
Name: count, dtype: int64

In [8]:

# Sample data
age_data = student_df['Age']

# Create a DataFrame
df = pd.DataFrame(age_data)

# Define bin edges and labels
bins = [5,10,15, 25, 30]
labels = ['Child', 'Teenager', 'Young Adult', 'Adult']

# Perform binning using pd.cut
student_df['Age Category'] = pd.cut(df['Age'].str.split('-').str[0].astype(int), bins=bins, labels=labels, right=False)

# Print the updated DataFrame
student_df.head()

student_df = student_df[student_df["Age Category"].notna()]

student_df


Unnamed: 0,Gender,Age,Education Level,Institution Type,Load-shedding,Financial Condition,Internet Type,Network Type,Class Duration,Self Lms,Device,Adaptivity Level,Class Duration (hours),Age Category
0,Boy,21-25,University,Non Government,Low,Mid,Wifi,4G,3-6,No,Tab,Moderate,90,Young Adult
1,Girl,21-25,University,Non Government,High,Mid,Mobile Data,4G,1-3,Yes,Mobile,Moderate,30,Young Adult
2,Girl,16-20,College,Government,Low,Mid,Wifi,4G,1-3,No,Mobile,Moderate,30,Young Adult
3,Girl,11-15,School,Non Government,Low,Mid,Mobile Data,4G,1-3,No,Mobile,Moderate,30,Teenager
4,Girl,16-20,School,Non Government,Low,Poor,Mobile Data,3G,0,No,Mobile,Low,0,Young Adult
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1200,Girl,16-20,College,Non Government,Low,Mid,Wifi,4G,1-3,No,Mobile,Low,30,Young Adult
1201,Girl,16-20,College,Non Government,High,Mid,Wifi,4G,3-6,No,Mobile,Moderate,90,Young Adult
1202,Boy,11-15,School,Non Government,Low,Mid,Mobile Data,3G,1-3,No,Mobile,Moderate,30,Teenager
1203,Girl,16-20,College,Non Government,Low,Mid,Wifi,4G,1-3,No,Mobile,Low,30,Young Adult


In [9]:
student_df.drop(['Age','Class Duration'], axis=1, inplace=True)

In [10]:
# Create a DataFrame

# Check for null values in the entire DataFrame
null_values = student_df.isnull().sum()
print("Null values in the entire DataFrame:")
print(null_values)

Null values in the entire DataFrame:
Gender                    0
Education Level           0
Institution Type          0
Load-shedding             0
Financial Condition       0
Internet Type             0
Network Type              0
Self Lms                  0
Device                    0
Adaptivity Level          0
Class Duration (hours)    0
Age Category              0
dtype: int64


In [11]:
cat_df = pd.get_dummies(student_df)
cat_df

Unnamed: 0,Class Duration (hours),Gender_Boy,Gender_Girl,Education Level_College,Education Level_School,Education Level_University,Institution Type_Government,Institution Type_Non Government,Load-shedding_High,Load-shedding_Low,...,Device_Computer,Device_Mobile,Device_Tab,Adaptivity Level_High,Adaptivity Level_Low,Adaptivity Level_Moderate,Age Category_Child,Age Category_Teenager,Age Category_Young Adult,Age Category_Adult
0,90,True,False,False,False,True,False,True,False,True,...,False,False,True,False,False,True,False,False,True,False
1,30,False,True,False,False,True,False,True,True,False,...,False,True,False,False,False,True,False,False,True,False
2,30,False,True,True,False,False,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
3,30,False,True,False,True,False,False,True,False,True,...,False,True,False,False,False,True,False,True,False,False
4,0,False,True,False,True,False,False,True,False,True,...,False,True,False,False,True,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1200,30,False,True,True,False,False,False,True,False,True,...,False,True,False,False,True,False,False,False,True,False
1201,90,False,True,True,False,False,False,True,True,False,...,False,True,False,False,False,True,False,False,True,False
1202,30,True,False,False,True,False,False,True,False,True,...,False,True,False,False,False,True,False,True,False,False
1203,30,False,True,True,False,False,False,True,False,True,...,False,True,False,False,True,False,False,False,True,False


In [12]:
# Define your features (X) and target (y)
X = student_df.drop(columns=['Adaptivity Level'],axis=1)
y = student_df['Adaptivity Level']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=70)

In [13]:
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler instance
scaler = MinMaxScaler()

# Scale the 'Class Duration (hours)' column in student_df
student_df[['Class Duration (hours)']] = scaler.fit_transform(student_df[['Class Duration (hours)']])


In [14]:
ordinal_mapping = {
    'Low': 1,
    'Mid': 2,
    'High': 3
}
network_mapping = {
    '2G': 1,
    '3G': 2,
    '4G': 3
}
financial_mapping = {
    'Poor': 1,
    'Mid': 2,
    'Rich': 3
}
student_df['Financial Condition'] = student_df['Financial Condition'].map(financial_mapping)
student_df['Network Type'] = student_df['Network Type'].map(network_mapping)
student_df['Adaptivity Level'] = student_df['Adaptivity Level'].map(ordinal_mapping)


In [15]:
df_encoded = pd.get_dummies(student_df, columns=['Gender', 'Education Level', 'Institution Type','Internet Type'])
df_encoded

Unnamed: 0,Load-shedding,Financial Condition,Network Type,Self Lms,Device,Adaptivity Level,Class Duration (hours),Age Category,Gender_Boy,Gender_Girl,Education Level_College,Education Level_School,Education Level_University,Institution Type_Government,Institution Type_Non Government,Internet Type_Mobile Data,Internet Type_Wifi
0,Low,2,3,No,Tab,,1.000000,Young Adult,True,False,False,False,True,False,True,False,True
1,High,2,3,Yes,Mobile,,0.333333,Young Adult,False,True,False,False,True,False,True,True,False
2,Low,2,3,No,Mobile,,0.333333,Young Adult,False,True,True,False,False,True,False,False,True
3,Low,2,3,No,Mobile,,0.333333,Teenager,False,True,False,True,False,False,True,True,False
4,Low,1,2,No,Mobile,1.0,0.000000,Young Adult,False,True,False,True,False,False,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1200,Low,2,3,No,Mobile,1.0,0.333333,Young Adult,False,True,True,False,False,False,True,False,True
1201,High,2,3,No,Mobile,,1.000000,Young Adult,False,True,True,False,False,False,True,False,True
1202,Low,2,2,No,Mobile,,0.333333,Teenager,True,False,False,True,False,False,True,True,False
1203,Low,2,3,No,Mobile,1.0,0.333333,Young Adult,False,True,True,False,False,False,True,False,True


In [16]:
# #Create a StandardScaler instances
# scaler = StandardScaler()

# # Fit the StandardScaler
# X_scaler = scaler.fit(X_train)

# # Scale the data
# X_train_scaled = X_scaler.transform(X_train)
# X_test_scaled = X_scaler.transform(X_test)

ValueError: could not convert string to float: 'Boy'