In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def process_and_model(dataset, target_column, dataset_name):
    print(f"\n\n--- Dataset: {dataset_name} ---")
  
    dataset = dataset.dropna().copy()  

    label_encoders = {}
    for column in dataset.select_dtypes(include=['object']).columns:
        le = LabelEncoder()
        dataset.loc[:, column] = le.fit_transform(dataset[column])  # Safe assignment
        label_encoders[column] = le

    X = dataset.drop(target_column, axis=1)
    y = dataset[target_column]

    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

   
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    
    models = {
        'Support Vector Machine': SVR(),
        'Decision Tree': DecisionTreeRegressor(random_state=42),
        'Random Forest': RandomForestRegressor(random_state=42, n_estimators=100)
    }

    results = {}
    for model_name, model in models.items():
        if model_name == 'Support Vector Machine':
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
        else:
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

        
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        results[model_name] = {'MSE': mse, 'MAE': mae, 'R2': r2}

    
    for model_name, metrics in results.items():
        print(f"\nModel: {model_name}")
        print(f"MSE: {metrics['MSE']:.4f}")
        print(f"MAE: {metrics['MAE']:.4f}")
        print(f"R2 Score: {metrics['R2']:.4f}")

    return results



grocery_df = pd.read_csv(r'C:\Users\lenovo\Downloads\Downloads/Grocery_Inventory new v1.csv')
life_expectancy_df = pd.read_csv(r'C:\Users\lenovo\Downloads\Downloads/life_expectancy_dataset.csv', sep=';', on_bad_lines='skip')
depression_df = pd.read_csv(r'C:\Users\lenovo\Downloads\Downloads/student_depression_dataset.csv')


print("Grocery Columns:", grocery_df.columns.tolist())
print("Life Expectancy Columns:", life_expectancy_df.columns.tolist())
print("Depression Columns:", depression_df.columns.tolist())


grocery_results = process_and_model(grocery_df, target_column='Unit_Price', dataset_name='Grocery Inventory')
life_expectancy_results = process_and_model(life_expectancy_df, target_column='life_expectancy_women', dataset_name='Life Expectancy')  # or 'life_expectancy_men' if you prefer
depression_results = process_and_model(depression_df, target_column='Depression', dataset_name='Student Depression')


Grocery Columns: ['Product_Name', 'Catagory', 'Supplier_Name', 'Warehouse_Location', 'Status', 'Product_ID', 'Supplier_ID', 'Date_Received', 'Last_Order_Date', 'Expiration_Date', 'Stock_Quantity', 'Reorder_Level', 'Reorder_Quantity', 'Unit_Price', 'Sales_Volume', 'Inventory_Turnover_Rate', 'percentage']
Life Expectancy Columns: ['country_code', 'country_name', 'region', 'sub-region', 'intermediate-region', 'year', 'life_expectancy_women', 'life_expectancy_men']
Depression Columns: ['id', 'Gender', 'Age', 'City', 'Profession', 'Academic Pressure', 'Work Pressure', 'CGPA', 'Study Satisfaction', 'Job Satisfaction', 'Sleep Duration', 'Dietary Habits', 'Degree', 'Have you ever had suicidal thoughts ?', 'Work/Study Hours', 'Financial Stress', 'Family History of Mental Illness', 'Depression']


--- Dataset: Grocery Inventory ---

Model: Support Vector Machine
MSE: 997.5230
MAE: 27.3870
R2 Score: -0.0081

Model: Decision Tree
MSE: 128.9141
MAE: 4.3586
R2 Score: 0.8697

Model: Random Forest
MSE