# Overview 
Prediksi berat pokemon berdasarkan Total,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed

**Machine Learning Step by step :**
1. [x] Memilah dataframe sesuai data yang diperlukan
2. [x] Menggunakan `RandomForestRegressor` sebagai machine learning model
3. [x] Melatih Model dan membuat prediksi
4. [x] Evaluasi model, untuk meningkatkan akurasi model
5. [x] Resolusi

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Langkah 1 : Persiapan Data

## 1.1 Memuat Data

In [2]:
dataframe = pd.read_csv('preprocessed-pokemon-structured.csv')

In [3]:
dataframe.head()

Unnamed: 0,Number,Name,Type_1,Total,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,...,isLegendary,Color,hasGender,Pr_Male,Egg_Group_1,hasMegaEvolution,Height_m,Weight_kg,Catch_Rate,Body_Style
0,1,Bulbasaur,9,57,17,24,23,41,37,24,...,0,3,1,6,10,0,6,49,5,9
1,2,Ivysaur,9,94,31,37,37,55,50,37,...,0,3,1,6,10,0,10,88,5,9
2,3,Venusaur,9,165,51,56,55,70,66,57,...,0,3,1,6,10,1,20,311,5,9
3,4,Charmander,6,51,11,27,18,36,22,42,...,0,7,1,6,10,0,5,63,5,0
4,5,Charmeleon,6,94,29,39,32,55,37,57,...,0,7,1,6,10,0,11,113,5,0


In [4]:
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 721 entries, 0 to 720
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Number            721 non-null    int64 
 1   Name              721 non-null    object
 2   Type_1            721 non-null    int64 
 3   Total             721 non-null    int64 
 4   HP                721 non-null    int64 
 5   Attack            721 non-null    int64 
 6   Defense           721 non-null    int64 
 7   Sp_Atk            721 non-null    int64 
 8   Sp_Def            721 non-null    int64 
 9   Speed             721 non-null    int64 
 10  Generation        721 non-null    int64 
 11  isLegendary       721 non-null    int64 
 12  Color             721 non-null    int64 
 13  hasGender         721 non-null    int64 
 14  Pr_Male           721 non-null    int64 
 15  Egg_Group_1       721 non-null    int64 
 16  hasMegaEvolution  721 non-null    int64 
 17  Height_m        

## 1.2 Memilah Data

In [5]:
kolom_pilihan = ['Total','HP','Attack','Defense','Sp_Atk','Sp_Def','Speed','Weight_kg']

In [6]:
def seleksiData(df,pilihan):
    df_droped = df.copy()
    for kolom in dataframe:
        if kolom not in pilihan:
            df_droped.drop(kolom,axis=1,inplace=True)
    return df_droped

In [7]:
dataframe = seleksiData(dataframe,kolom_pilihan)

In [8]:
dataframe.head()

Unnamed: 0,Total,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Weight_kg
0,57,17,24,23,41,37,24,49
1,94,31,37,37,55,50,37,88
2,165,51,56,55,70,66,57,311
3,51,11,27,18,36,22,42,63
4,94,29,39,32,55,37,57,113


# 1.3 Memisahkan Data

In [9]:
data = dataframe.drop('Weight_kg',axis=1)
label = dataframe['Weight_kg']

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
x_train, x_test, y_train, y_test = train_test_split(data,label,test_size=0.2)

# Langkah 2 : Membuat Machine Learning Model

In [12]:
from sklearn.ensemble import RandomForestRegressor

In [13]:
model = RandomForestRegressor(n_estimators=100)

# Langkah 3 : Melatih Model dan Membuat Prediksi

In [14]:
model.fit(x_train,y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                      max_features='auto', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=100,
                      n_jobs=None, oob_score=False, random_state=None,
                      verbose=0, warm_start=False)

In [15]:
prediction = model.predict(x_test)

# Langkah 4 : Evaluasi Model

## 4.1 Evaluasi Matrix Error

In [16]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [17]:
def evaluate(y_true,y_pred):
    for name,error in {'MAE':mean_absolute_error,'MSE':mean_squared_error,'R2':r2_score}.items():
        print(f'{name} : {error(y_true,y_pred)}')

In [18]:
evaluate(y_test,prediction)

MAE : 59.601180350301036
MSE : 6162.772479872134
R2 : 0.5495447687904227


# 4.2 Meningkatkan Model

In [19]:
from sklearn.model_selection import GridSearchCV,cross_val_score

In [29]:
params = {'n_estimators':[100]}

In [33]:
best_model = GridSearchCV(RandomForestRegressor(),params,cv=5,refit=True,iid=False)

In [34]:
best_model.fit(x_train,y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=RandomForestRegressor(bootstrap=True, criterion='mse',
                                             max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators='warn', n_jobs=None,
                                             oob_score=False, random_state=None,
                                             verbose=0, warm_start=False),
             iid=False, n_jobs=None, param_grid={'n_estimators': [100]},
             pre_disp

In [35]:
best_model.best_estimator_

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                      max_features='auto', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=100,
                      n_jobs=None, oob_score=False, random_state=None,
                      verbose=0, warm_start=False)

In [36]:
best_prediction = best_model.predict(x_test)

In [37]:
evaluate(y_test,best_prediction)

MAE : 58.42618084291188
MSE : 6062.000554376961
R2 : 0.5569104862733605


# Resolusi

In [48]:
pd.DataFrame({'Actual':y_test,'Prediction 1':prediction,'Prediction 2':best_prediction})

Unnamed: 0,Actual,Prediction 1,Prediction 2
355,165,257.230000,262.680000
687,166,76.110000,68.855000
710,193,218.910000,215.230000
393,134,129.470000,125.410000
391,238,216.498333,215.642000
...,...,...,...
461,351,207.410000,204.600000
384,7,34.618730,30.612659
334,198,211.002000,212.760000
53,116,55.210000,51.195000
