In [262]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import warnings

from sklearn.preprocessing import LabelEncoder
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score

warnings.filterwarnings('ignore')


In [263]:
df= pd.read_csv('performance.csv')
print(df)

   percentage educational_resources parents_education  personality passion  \
0          85                  good          graduate  extroverted     yes   
1          78          satisfactory     undergraduate   ambiverted      no   
2          92                  poor               PhD  introverted     yes   
3          87                  good          graduate  extroverted      no   
4          80          satisfactory     undergraduate   ambiverted     yes   
5          95                  good               PhD  introverted     yes   
6          88          satisfactory          graduate  extroverted      no   
7          75                  poor     undergraduate   ambiverted      no   
8          89                  good          graduate  extroverted     yes   
9          82          satisfactory     undergraduate  introverted     yes   

   ott_time  sm_time  travel_time  eduvids_time  game_time  extra_time  \
0       2.0      1.0          0.5           3.0        1.0         

In [264]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   percentage             10 non-null     int64  
 1   educational_resources  10 non-null     object 
 2   parents_education      10 non-null     object 
 3   personality            10 non-null     object 
 4   passion                10 non-null     object 
 5   ott_time               10 non-null     float64
 6   sm_time                10 non-null     float64
 7   travel_time            10 non-null     float64
 8   eduvids_time           10 non-null     float64
 9   game_time              10 non-null     float64
 10  extra_time             10 non-null     float64
 11  ai_usage               10 non-null     object 
dtypes: float64(6), int64(1), object(5)
memory usage: 1.1+ KB


In [265]:
# Map educational_resources values to numerical representations
# convert categorical variable to numerical
df['passion'] = df['passion'].apply(lambda x: 1 if x =='yes' else 0)
print(df)


   percentage educational_resources parents_education  personality  passion  \
0          85                  good          graduate  extroverted        1   
1          78          satisfactory     undergraduate   ambiverted        0   
2          92                  poor               PhD  introverted        1   
3          87                  good          graduate  extroverted        0   
4          80          satisfactory     undergraduate   ambiverted        1   
5          95                  good               PhD  introverted        1   
6          88          satisfactory          graduate  extroverted        0   
7          75                  poor     undergraduate   ambiverted        0   
8          89                  good          graduate  extroverted        1   
9          82          satisfactory     undergraduate  introverted        1   

   ott_time  sm_time  travel_time  eduvids_time  game_time  extra_time  \
0       2.0      1.0          0.5           3.0        1

In [266]:
total_null_values=df.isnull().sum()
print(total_null_values)

percentage               0
educational_resources    0
parents_education        0
personality              0
passion                  0
ott_time                 0
sm_time                  0
travel_time              0
eduvids_time             0
game_time                0
extra_time               0
ai_usage                 0
dtype: int64


In [267]:
mapping_dict1 = {'good': 1, 'poor': 0, 'satisfactory': 0.5}

# Apply the mapping to the DataFrame column
df['educational_resources'] = df['educational_resources'].map(mapping_dict1)


In [268]:
mapping_dict2 = {'extroverted': 0, 'introverted': 1, 'ambiverted': 0.5}

# Apply the mapping to the DataFrame column
df['personality'] = df['personality'].map(mapping_dict2)

In [269]:
mapping_dict3 = {'regular': 0, 'never': 1, 'occasional': 0.5}

# Apply the mapping to the DataFrame column
df['ai_usage'] = df['ai_usage'].map(mapping_dict3)

In [270]:
mapping_dict4 = {'below10': 0.20, '10': 0.22, '12': 0.25, 'undergraduate': 0.5, 'graduate': 0.65, 'PhD': 0.75, }

# Apply the mapping to the DataFrame column
df['parents_education'] = df['parents_education'].map(mapping_dict4)

In [271]:
df['study_time'] = 9-df['ott_time']-df['sm_time']-df['travel_time']+df['eduvids_time']-df['game_time']-df['extra_time']

In [272]:
print(df)

   percentage  educational_resources  parents_education  personality  passion  \
0          85                    1.0               0.65          0.0        1   
1          78                    0.5               0.50          0.5        0   
2          92                    0.0               0.75          1.0        1   
3          87                    1.0               0.65          0.0        0   
4          80                    0.5               0.50          0.5        1   
5          95                    1.0               0.75          1.0        1   
6          88                    0.5               0.65          0.0        0   
7          75                    0.0               0.50          0.5        0   
8          89                    1.0               0.65          0.0        1   
9          82                    0.5               0.50          1.0        1   

   ott_time  sm_time  travel_time  eduvids_time  game_time  extra_time  \
0       2.0      1.0          0.5 

In [273]:
# Assuming df is your DataFrame
x = ['educational_resources','parents_education','personality','passion','ott_time','sm_time','travel_time','eduvids_time','game_time','extra_time','ai_usage']
y = df['percentage']# Target variable