In [3]:
import pandas as pd

# Load DataFrame from CSV file
file_path = r"C:\Users\LENOVO\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Anaconda3 (64-bit)\Impact_of_Mobile_Phone_on_Students_Health.csv"
df = pd.read_csv(file_path)

# Check for missing values
missing_values = df.isnull().sum()
print("Missing values per column:")
print(missing_values)


Missing values per column:
Names                             0
Age                               0
Gender                            0
Mobile Phone                      0
Mobile Operating System           0
Mobile phone use for education    2
Mobile phone activities           1
Helpful for studying              2
Educational Apps                  2
Daily usages                      1
Performance impact                2
Usage distraction                 2
Attention span                    1
Useful features                   2
Health Risks                      2
Beneficial subject                1
Usage symptoms                    2
Symptom frequency                 1
Health precautions                1
Health rating                     1
dtype: int64


In [5]:
# Drop rows with any missing values
df_cleaned_rows = df.dropna()


In [6]:
# Drop columns with any missing values
df_cleaned_cols = df.dropna(axis=1)


In [7]:
# Fill missing values with a specific value (e.g., 0)
df_filled = df.fillna(value=0)


In [9]:
df_ffill = df.ffill()


In [10]:
df_bfill = df.bfill()


In [11]:
# Displaying results
print("DataFrame after forward fill:")
print(df_ffill.head())

print("\nDataFrame after backward fill:")
print(df_bfill.head())

DataFrame after forward fill:
      Names    Age Gender  Mobile Phone  Mobile Operating System   \
0       Ali  21-25    Male           Yes                  Android   
1     Bilal  21-25    Male           Yes                  Android   
2    Hammad  21-25    Male           Yes                      IOS   
3  Abdullah  21-25    Male           Yes                  Android   
4     Waqar  21-25    Male           Yes                      IOS   

  Mobile phone use for education Mobile phone activities Helpful for studying  \
0                      Sometimes            Social Media                  Yes   
1                      Sometimes            Social Media                  Yes   
2                      Sometimes            All of these                  Yes   
3                     Frequently            All of these                  Yes   
4                     Frequently            All of these                  Yes   

     Educational Apps Daily usages Performance impact      Usage dis

In [13]:
# Convert relevant columns to numeric dtype
numeric_columns = ['Age', 'Daily usages', 'Performance impact']  # Example numeric columns
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Interpolate missing values in numeric columns
df_interpolated = df.interpolate()

# Displaying results
print("DataFrame after interpolation:")
print(df_interpolated.head())

DataFrame after interpolation:
      Names  Age Gender  Mobile Phone  Mobile Operating System   \
0       Ali  NaN    Male           Yes                  Android   
1     Bilal  NaN    Male           Yes                  Android   
2    Hammad  NaN    Male           Yes                      IOS   
3  Abdullah  NaN    Male           Yes                  Android   
4     Waqar  NaN    Male           Yes                      IOS   

  Mobile phone use for education Mobile phone activities Helpful for studying  \
0                      Sometimes            Social Media                  Yes   
1                      Sometimes            Social Media                  Yes   
2                      Sometimes            All of these                  Yes   
3                     Frequently            All of these                  Yes   
4                     Frequently            All of these                  Yes   

     Educational Apps  Daily usages  Performance impact  \
0  Educational Video

  df_interpolated = df.interpolate()


In [14]:

# Convert 'Age' column to float
df['Age'] = df['Age'].astype(float)


In [17]:
# Identify duplicate rows
duplicate_rows = df[df.duplicated()]
print("Duplicate rows:")
print(duplicate_rows)


Duplicate rows:
Empty DataFrame
Columns: [Names, Age, Gender , Mobile Phone , Mobile Operating System , Mobile phone use for education, Mobile phone activities, Helpful for studying, Educational Apps, Daily usages, Performance impact, Usage distraction, Attention span, Useful features, Health Risks, Beneficial subject, Usage symptoms, Symptom frequency, Health precautions, Health rating, Age_normalized]
Index: []

[0 rows x 21 columns]


In [18]:
# Drop duplicate rows
df_unique = df.drop_duplicates()


In [19]:
# Convert 'Names' column values to lowercase
df['Names'] = df['Names'].str.lower()


In [26]:
print(df.columns)


Index(['Names', 'Age', 'Gender ', 'Mobile Phone ', 'Mobile Operating System ',
       'Mobile phone use for education', 'Mobile phone activities',
       'Helpful for studying', 'Educational Apps', 'Daily usages',
       'Performance impact', 'Usage distraction', 'Attention span',
       'Useful features', 'Health Risks', 'Beneficial subject',
       'Usage symptoms', 'Symptom frequency', 'Health precautions',
       'Health rating', 'Age_normalized', 'Age_group'],
      dtype='object')


In [31]:
# Filter rows based on age range 
df_filtered_age = df[(df['Age'] >= 21) & (df['Age'] <= 25)]   
print("Filtered DataFrame based on age range:")
print(df_filtered_age.head())


Filtered DataFrame based on age range:
Empty DataFrame
Columns: [Names, Age, Gender , Mobile Phone , Mobile Operating System , Mobile phone use for education, Mobile phone activities, Helpful for studying, Educational Apps, Daily usages, Performance impact, Usage distraction, Attention span, Useful features, Health Risks, Beneficial subject, Usage symptoms, Symptom frequency, Health precautions, Health rating, Age_normalized, Age_group]
Index: []

[0 rows x 22 columns]


In [34]:
from sklearn.preprocessing import LabelEncoder

 #Label encoding for 'Health Risks' column
encoder = LabelEncoder()
df['Health Risks_encoded'] = encoder.fit_transform(df['Health Risks'])
print("Encoded 'Health Risks' column:")
print(df[['Health Risks', 'Health Risks_encoded']].head())


Encoded 'Health Risks' column:
     Health Risks  Health Risks_encoded
0             Yes                     2
1             Yes                     2
2             Yes                     2
3  Only Partially                     1
4              No                     0


In [36]:
#  Grouping 'Age' into categories
bins = [0, 12, 19, 59, df['Age'].max()]
labels = ['Child', 'Teen', 'Adult', 'Senior']
df['Age_group'] = pd.cut(df['Age'], bins=bins, labels=labels, right=False)
print("DataFrame with 'Age_group' column:")
print(df[['Age', 'Age_group']].head())


DataFrame with 'Age_group' column:
   Age Age_group
0  NaN       NaN
1  NaN       NaN
2  NaN       NaN
3  NaN       NaN
4  NaN       NaN


In [40]:
 Creating a new column 'Total_usage' based on 'Daily usages' and 'Usage distraction'
df['Total_usage'] = df['Daily usages'] - df['Usage distraction']
print("DataFrame with 'Total_usage' column:")
print(df[['Daily usages', 'Usage distraction', 'Total_usage']].head())


DataFrame with 'Total_usage' column:
   Daily usages      Usage distraction Total_usage
0           NaN           During Exams         NaN
1           NaN           During Exams         NaN
2           NaN        Not Distracting         NaN
3           NaN  During Class Lectures         NaN
4           NaN         While Studying         NaN
