In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [3]:
df = pd.read_csv('Students Social Media Addiction.csv')

In [4]:
df.isnull().sum()

Student_ID                      0
Age                             0
Gender                          0
Academic_Level                  0
Country                         0
Avg_Daily_Usage_Hours           0
Most_Used_Platform              0
Affects_Academic_Performance    0
Sleep_Hours_Per_Night           0
Mental_Health_Score             0
Relationship_Status             0
Conflicts_Over_Social_Media     0
Addicted_Score                  0
dtype: int64

In [5]:
df.sample(5)

Unnamed: 0,Student_ID,Age,Gender,Academic_Level,Country,Avg_Daily_Usage_Hours,Most_Used_Platform,Affects_Academic_Performance,Sleep_Hours_Per_Night,Mental_Health_Score,Relationship_Status,Conflicts_Over_Social_Media,Addicted_Score
32,33,18,Male,High School,Indonesia,5.4,TikTok,Yes,5.4,5,Complicated,4,8
592,593,21,Female,Undergraduate,Turkey,6.9,TikTok,Yes,6.1,5,Single,4,8
164,165,19,Female,Undergraduate,Sri Lanka,5.1,Facebook,No,7.0,7,Single,2,5
175,176,22,Male,Graduate,Pakistan,4.5,Instagram,Yes,6.7,5,In Relationship,3,7
472,473,19,Female,Undergraduate,Ireland,3.4,Instagram,Yes,8.5,7,Single,3,6


In [6]:
transformer = ColumnTransformer(transformers=[
  ('tnf1', OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'), ['Gender', 'Country', 'Most_Used_Platform', 'Affects_Academic_Performance', 'Relationship_Status']),
  ('tnf2', OrdinalEncoder(categories=[['High School', 'Undergraduate', 'Graduate']]), ['Academic_Level'])
], remainder='passthrough')

In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(df.drop(columns=['Addicted_Score']), df['Addicted_Score'], test_size=0.2)

In [8]:
x_train.sample(5)

Unnamed: 0,Student_ID,Age,Gender,Academic_Level,Country,Avg_Daily_Usage_Hours,Most_Used_Platform,Affects_Academic_Performance,Sleep_Hours_Per_Night,Mental_Health_Score,Relationship_Status,Conflicts_Over_Social_Media
79,80,18,Female,High School,Armenia,5.9,Instagram,Yes,5.2,5,Complicated,4
389,390,21,Male,Graduate,Canada,4.7,Instagram,Yes,7.5,6,Single,3
689,690,23,Male,Graduate,Spain,6.5,Facebook,Yes,6.1,5,Single,4
468,469,19,Female,Undergraduate,France,3.1,Instagram,No,8.9,7,In Relationship,2
454,455,20,Female,Undergraduate,Switzerland,2.6,Instagram,No,9.2,8,Single,2


In [9]:
x_train_transform = pd.DataFrame(transformer.fit_transform(x_train))
x_test_transform = pd.DataFrame(transformer.transform(x_test))



In [10]:
x_train_transform.sample(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,105,106,107,108,109,110,111,112,113,114
448,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,2.0,186.0,21.0,4.9,7.8,7.0,2.0
483,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,2.0,548.0,22.0,3.8,8.3,7.0,2.0
316,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,239.0,20.0,4.6,7.3,7.0,2.0
151,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,0.0,2.0,635.0,22.0,5.7,6.7,6.0,3.0
24,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,105.0,22.0,2.9,7.0,7.0,2.0


In [11]:
df['Country'].value_counts()

Country
India        53
USA          40
Canada       34
Denmark      27
Ireland      27
             ..
Uruguay       1
Ecuador       1
Venezuela     1
Peru          1
Jamaica       1
Name: count, Length: 110, dtype: int64