# Finding accuracy score for linear regression and multi linear regression

In [1]:
# Import libraries
import joblib
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

## Import Dataset

In [2]:
# Load data
df = pd.read_csv('../datasets/mldata2.csv')
df.head()

Unnamed: 0,age,height,weight,gender,likeness
0,27,170.688,76.0,Male,Biryani
1,41,165.0,70.0,Male,Biryani
2,29,171.0,80.0,Male,Biryani
3,27,173.0,102.0,Male,Biryani
4,29,164.0,67.0,Male,Biryani


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245 entries, 0 to 244
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       245 non-null    int64  
 1   height    245 non-null    float64
 2   weight    245 non-null    float64
 3   gender    245 non-null    object 
 4   likeness  245 non-null    object 
dtypes: float64(2), int64(1), object(2)
memory usage: 9.7+ KB


In [4]:
# Convert 'gender' data type to int
df['gender'] = df['gender'].replace('Male', 1)
df['gender'] = df['gender'].replace('Female', 0)

In [5]:
# Check unique values in 'gender'
df['gender'].unique()

array([1, 0], dtype=int64)

### Simple Linear Regression

In [6]:
# Split data into input (X) and output (y)
X = df[['age', 'height', 'weight', 'gender']]
y = df['likeness']

In [7]:
X.head()

Unnamed: 0,age,height,weight,gender
0,27,170.688,76.0,1
1,41,165.0,70.0,1
2,29,171.0,80.0,1
3,27,173.0,102.0,1
4,29,164.0,67.0,1


In [8]:
y.head()

0    Biryani
1    Biryani
2    Biryani
3    Biryani
4    Biryani
Name: likeness, dtype: object

In [9]:
# Split data into test and train (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [10]:
# Create a model
model = DecisionTreeClassifier()
# Fit the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Check score
score = accuracy_score(y_test, y_pred)
score

0.40816326530612246

In [11]:
# Save model (extension joblib)
joblib.dump(model, 'dt_foodie.joblib')

['dt_foodie.joblib']

In [12]:
# Load the model
loaded_model = joblib.load('dt_foodie.joblib')

# Make predictions on loaded model
loaded_pred = loaded_model.predict(X_test)

# Check loaded model score
loaded_score = accuracy_score(y_test, loaded_pred)
loaded_score

0.40816326530612246

Both scores are same, it means the model is saved properly.