# Assignment of Day-81
### Calculate all regression metrics values

In [615]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

In [616]:
# load the data
df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [617]:
df.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [618]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [619]:
# Encoding the categorical data
le = LabelEncoder()
df['species'] = le.fit_transform(df['species'])

In [620]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [621]:
# split the data into X and y
X = df.drop(['species'], axis=1)
y = df['species']

In [622]:
# split the data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [623]:
# Display total numbers in each split
print(f'Total Length of Dataset: ', len(df))
print(f'Total X_Train and y_Train Data: ', len(X_train), '&', len(y_train))
print(f'Total X_Test and y_Test Data: ', len(X_test), '&', len(y_test))

Total Length of Dataset:  150
Total X_Train and y_Train Data:  105 & 105
Total X_Test and y_Test Data:  45 & 45


In [624]:
# create and train teh model with pred
model = DecisionTreeRegressor(criterion='friedman_mse', random_state=42)
model.fit(X_train, y_train)

In [625]:
# predict the model
y_pred = model.predict(X_test)

In [626]:
# Calculation for Adjusted R-squared
len_dataset = len(X_test)
r2 = r2_score(y_test, y_pred)
p = len(X_test.columns)
adjusted_r_squared = 1 - (((1-r2)*(len_dataset-1))/(len_dataset-p-1))

In [627]:
# evaluate the model
print('01 - Simple Mean: ', np.mean(y_test))
print('02 - Mean Error: ', np.mean(y_test-y_pred))
print('03 - Mean Absolute Error: ', mean_absolute_error(y_test, y_pred))
print('04 - Mean Absolute Percentage Error: ', mean_absolute_percentage_error(y_test, y_pred))
print('05 - Mean Squared Error: ', mean_squared_error(y_test, y_pred))
print('06 - Root Mean Squared Error: ', np.sqrt(mean_squared_error(y_test, y_pred)))
print('07 - R-Squared: ', r2_score(y_test, y_pred))
print('08 - Adjusted R-Squared: ', adjusted_r_squared)

01 - Simple Mean:  1.0444444444444445
02 - Mean Error:  -0.044444444444444446
03 - Mean Absolute Error:  0.044444444444444446
04 - Mean Absolute Percentage Error:  0.044444444444444446
05 - Mean Squared Error:  0.044444444444444446
06 - Root Mean Squared Error:  0.21081851067789195
07 - R-Squared:  0.9410222804718218
08 - Adjusted R-Squared:  0.935124508519004


In [628]:
# save the decision tree classifier
from sklearn.tree import export_graphviz
export_graphviz(model, out_file='All_Regression_Values.dot', feature_names=X.columns, filled=True, rounded=True)