In [16]:
# Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import os

import pickle


In [17]:
#loading data from the dataset folder
train = pd.read_csv('dataset/train.csv')
test = pd.read_csv('dataset/test.csv')

In [18]:
#splitting the data into train and test
def split_data(train, test):
    X_train = train[['radius', 'height']]
    y_train = train['surface_area']
    
    X_test = test[['radius', 'height']]
    y_test = test['surface_area']
    
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = split_data(train, test)

In [27]:


preprocessor = StandardScaler()
# Define the models as before
models = {
    'SVR': SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1),
    'Random Forest': RandomForestRegressor(n_estimators=100, max_depth=10, random_state=0),
}

# Create a dictionary to store the evaluation results
results = {}

for name, model in models.items():
    pipe = Pipeline([
        ('preprocessor', preprocessor),
        ('model', model)
    ])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results[name] = {
        'MSE': mse,
        'MAE': mae,
        'R2': r2
    }
    
# Print the evaluation results for each model
for name, metrics in results.items():
    print(f'{name} MSE: {metrics["MSE"]}')
    print(f'{name} MAE: {metrics["MAE"]}')
    print(f'{name} R2: {metrics["R2"]}')


SVR MSE: 5.989687009089831
SVR MAE: 0.93654201000233
SVR R2: 0.9999323010568322
Random Forest MSE: 22.54839728255437
Random Forest MAE: 3.214965360401975
Random Forest R2: 0.9997451448358085


In [28]:
# Calculate a composite score for each model based on MSE, MAE, and R2
composite_scores = {}
for name, metrics in results.items():
    mse = metrics['MSE']
    mae = metrics['MAE']
    r2 = metrics['R2']
    
    # You can choose to use the mean or weighted mean here
    # In this example, we'll use the mean
    composite_score = (mse + mae + r2) / 3.0
    
    composite_scores[name] = composite_score

# Find the best model based on the composite score
best_model = min(composite_scores, key=composite_scores.get)

print('Best model based on composite score:', best_model)


Best model based on composite score: SVR


In [30]:
import joblib

# Save the model to a file
joblib.dump(pipe, 'best_model.pkl')



['best_model.pkl']