In [None]:

# ------------------------------
# DATA PREPROCESSING SCRIPT
# ------------------------------

# Step 1: Import required libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split

# Step 2: Load the dataset
file_path = '/mnt/data/20230329093832Mobile-Addiction-.csv'
df = pd.read_csv(file_path)

# Step 3: Explore the dataset
print("First 5 rows of the dataset:")
print(df.head())
print("\nDataset Columns:")
print(df.columns)

# Step 4: Drop duplicate rows
initial_shape = df.shape
df = df.drop_duplicates()
print(f"\nRemoved {initial_shape[0] - df.shape[0]} duplicate rows.")

# Step 5: Handle missing values
missing_values = df.isnull().sum()
print("\nMissing Values in each column:")
print(missing_values)

# Drop rows with any missing values
df = df.dropna()
print(f"\nAfter dropping missing values, dataset shape: {df.shape}")

# Step 6: Encode categorical variables (Label Encoding)
categorical_cols = df.select_dtypes(include=['object']).columns
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

print("\nLabel Encoding applied to categorical columns.")

# Step 7: Normalize numeric features
numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
scaler = MinMaxScaler()
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

print("\nNumerical columns normalized using MinMaxScaler.")

# Step 8: Splitting dataset into X and y
# Replace 'Addiction_Level' with your actual target column
target_column = 'Addiction_Level'

X = df.drop(target_column, axis=1)
y = df[target_column]

# Step 9: Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("\nData successfully split into Training and Testing sets.")
print(f"Training set size: {X_train.shape}")
print(f"Testing set size: {X_test.shape}")

# Step 10: Final message
print("\n✅ Data Preprocessing Completed Successfully!")
