Importing Required Libraries


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


##Evaluating Model ,Processing and spliting data

In [10]:
def evaluate_model(model, X_test, y_test):
    """
    Evaluate the model using MSE, MAE, and R2 metrics.
    """
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    return mse, mae, r2

def run_models(data, target_column):
    """
    Preprocess data, split into training and testing sets (80%-20%), train three regression models,
    and print out evaluation metrics.
    """
    # Separate features and target
    X = data.drop(columns=[target_column])
    y = data[target_column]
    
    # Convert categorical variables to dummy variables (if any)
    X = pd.get_dummies(X, drop_first=True)
    
    # Split the dataset into training (80%) and testing (20%)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Initialize models
    models = {
        'SVR': SVR(kernel='rbf'),
        'Decision Tree': DecisionTreeRegressor(random_state=42),
        'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42)
    }
    
    results = {}
    for name, model in models.items():
        model.fit(X_train, y_train)
        mse, mae, r2 = evaluate_model(model, X_test, y_test)
        results[name] = {'MSE': mse, 'MAE': mae, 'R2': r2}
        print(f"{name} Results:")
        print(f"  MSE: {mse:.4f}")
        print(f"  MAE: {mae:.4f}")
        print(f"  R2: {r2:.4f}")
        print("-" * 30)
        
    return results


###Analyze Dataset 1 – Brain Tumor Dataset

In [11]:
# Load the brain tumor dataset
brain_data = pd.read_csv("brain_tumor_dataset.csv")
print("Brain Tumor Dataset Columns:")
print(brain_data.columns)

target_column = 'Tumor_Size'  

if target_column not in brain_data.columns:
    print(f"Target column '{target_column}' not found. Please update the target column name.")
else:
    print("Running models on Brain Tumor Dataset...")
    brain_results = run_models(brain_data, target_column)


Brain Tumor Dataset Columns:
Index(['Patient_ID', 'Age', 'Gender', 'Tumor_Type', 'Tumor_Size', 'Location',
       'Histology', 'Stage', 'Symptom_1', 'Symptom_2', 'Symptom_3',
       'Radiation_Treatment', 'Surgery_Performed', 'Chemotherapy',
       'Survival_Rate', 'Tumor_Growth_Rate', 'Family_History', 'MRI_Result',
       'Follow_Up_Required'],
      dtype='object')
Running models on Brain Tumor Dataset...
SVR Results:
  MSE: 7.4788
  MAE: 2.3582
  R2: -0.0009
------------------------------
Decision Tree Results:
  MSE: 15.4735
  MAE: 3.2124
  R2: -1.0708
------------------------------
Random Forest Results:
  MSE: 7.7166
  MAE: 2.3836
  R2: -0.0327
------------------------------


###Analyze Dataset 2 – Car Price Prediction Dataset

In [12]:
# Load the car price prediction dataset
car_data = pd.read_csv("car_price_prediction_.csv")
print("Car Price Dataset Columns:")
print(car_data.columns)


target_column = 'Price'  

if target_column not in car_data.columns:
    print(f"Target column '{target_column}' not found. Please update the target column name.")
else:
    print("Running models on Car Price Prediction Dataset...")
    car_results = run_models(car_data, target_column)


Car Price Dataset Columns:
Index(['Car ID', 'Brand', 'Year', 'Engine Size', 'Fuel Type', 'Transmission',
       'Mileage', 'Condition', 'Price', 'Model'],
      dtype='object')
Running models on Car Price Prediction Dataset...
SVR Results:
  MSE: 757546915.9737
  MAE: 23767.2034
  R2: 0.0000
------------------------------
Decision Tree Results:
  MSE: 1460959993.9981
  MAE: 31052.1789
  R2: -0.9285
------------------------------
Random Forest Results:
  MSE: 789932943.6054
  MAE: 24314.5340
  R2: -0.0427
------------------------------


###Analyze Dataset 3 – Grocery Inventory Dataset

In [13]:
# Load the grocery inventory dataset
grocery_data = pd.read_csv("Grocery_Inventory new v1.csv")
print("Grocery Inventory Dataset Columns:")
print(grocery_data.columns)

target_column = 'Inventory_Turnover_Rate'  

if target_column not in grocery_data.columns:
    print(f"Target column '{target_column}' not found. Please update the target column name.")
else:
    print("Running models on Grocery Inventory Dataset...")
    grocery_results = run_models(grocery_data, target_column)


Grocery Inventory Dataset Columns:
Index(['Product_Name', 'Catagory', 'Supplier_Name', 'Warehouse_Location',
       'Status', 'Product_ID', 'Supplier_ID', 'Date_Received',
       'Last_Order_Date', 'Expiration_Date', 'Stock_Quantity', 'Reorder_Level',
       'Reorder_Quantity', 'Unit_Price', 'Sales_Volume',
       'Inventory_Turnover_Rate', 'percentage'],
      dtype='object')
Running models on Grocery Inventory Dataset...
SVR Results:
  MSE: 857.6394
  MAE: 25.3577
  R2: -0.0002
------------------------------
Decision Tree Results:
  MSE: 1547.2273
  MAE: 31.2576
  R2: -0.8045
------------------------------
Random Forest Results:
  MSE: 878.2136
  MAE: 25.4554
  R2: -0.0242
------------------------------


###Comparing Results Across Datasets

In [14]:
print("Brain Tumor Dataset Results:")
print(brain_results)
print("\nCar Price Prediction Dataset Results:")
print(car_results)
print("\nGrocery Inventory Dataset Results:")
print(grocery_results)


Brain Tumor Dataset Results:
{'SVR': {'MSE': 7.478769108715038, 'MAE': 2.358189682977991, 'R2': -0.0008704610454763895}, 'Decision Tree': {'MSE': 15.473505773096123, 'MAE': 3.2123951072422012, 'R2': -1.0707919487795827}, 'Random Forest': {'MSE': 7.716571907728853, 'MAE': 2.383612445304426, 'R2': -0.03269518963744589}}

Car Price Prediction Dataset Results:
{'SVR': {'MSE': 757546915.973701, 'MAE': 23767.20336498886, 'R2': 8.469780201636468e-06}, 'Decision Tree': {'MSE': 1460959993.998114, 'MAE': 31052.17888, 'R2': -0.9285242790676209}, 'Random Forest': {'MSE': 789932943.6053847, 'MAE': 24314.5340382, 'R2': -0.042742352177170595}}

Grocery Inventory Dataset Results:
{'SVR': {'MSE': 857.639420784125, 'MAE': 25.357662666378186, 'R2': -0.00023060595607860002}, 'Decision Tree': {'MSE': 1547.2272727272727, 'MAE': 31.257575757575758, 'R2': -0.8044693784444299}, 'Random Forest': {'MSE': 878.2136318181817, 'MAE': 25.45540404040404, 'R2': -0.024225486637808213}}
