# **Random Forest - Classification**

In [30]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [33]:
cancer=load_breast_cancer()

df = pd.DataFrame(data=cancer.data, columns=cancer.feature_names) 
df["class"] = cancer.target 
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,class
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [34]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,:-1], df.iloc[:,-1],test_size=0.3,random_state=100)

In [35]:
model = RandomForestClassifier(n_estimators = 30, max_depth=5, random_state = 42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(accuracy_score(y_test, y_pred))

0.935672514619883


In [37]:
# n_estimators : The number of trees in the forest
# max_depth : The maximum depth of the tree

model = RandomForestClassifier(n_estimators = 100, max_depth=5, random_state = 42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(accuracy_score(y_test, y_pred))

0.9532163742690059


In [38]:
model = RandomForestClassifier(n_estimators = 100, max_depth=7, random_state = 42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(accuracy_score(y_test, y_pred))

0.9590643274853801


In [39]:
model = RandomForestClassifier(n_estimators = 100, max_depth=10, random_state = 42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(accuracy_score(y_test, y_pred))

0.9532163742690059


# **Random Forest - Regression**

In [None]:
import numpy as np
import random
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor 

In [40]:
def randomGenerator(num_len, num_min, num_max):
    result = []
    for i in range(num_len):
        result.append(random.randint(num_min, num_max))
    return result
    
roof = randomGenerator(5000, 0, 1)
yard = randomGenerator(5000, 0, 10)
bathroom = randomGenerator(5000, 1, 3)
livingroom = randomGenerator(5000, 0, 3)
room = randomGenerator(5000, 1, 10)

df = pd.DataFrame({'roof' : roof, 'yard' : yard, 'bathroom' : bathroom, 'livingroom' : livingroom, 'room' : room})

df['price'] = df['roof'] * 1000 + df['yard'] * 100 + df['bathroom'] * 500 + df['livingroom'] * 800 + df['room'] * 300

df.head()

Unnamed: 0,roof,yard,bathroom,livingroom,room,price
0,0,9,3,3,6,6600
1,1,6,3,2,2,5300
2,1,1,3,2,8,6600
3,0,3,2,1,10,5100
4,1,3,3,3,7,7300


In [None]:
X = df[['roof', 'yard', 'bathroom', 'livingroom', 'room']]
y = df['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [41]:
model = RandomForestRegressor(n_estimators = 10, random_state = 42)
model.fit(X = X_train, y = y_train)
y_pred = model.predict(X_test)

result = pd.DataFrame({'pred' : y_pred, 'real' : y_test})
result.head()

Unnamed: 0,pred,real
400,0.0,0
225,0.4,1
321,0.0,0
173,1.0,1
506,1.0,1


In [42]:
mse = mean_absolute_error(y_test, y_pred)
print('mse: ', mse)
rmse = (np.sqrt(mse))
print('rmse: ', rmse)

mse:  0.07485380116959064
rmse:  0.27359422722270776


# **퀴즈 1**

*   make_moons 데이터에 대하여, 서로 다른 $test$ 정확도를 가지는 모델을 두 개 이상 생성하고 그 결과를 출력하시오.





In [31]:
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=100, noise=0.26, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [32]:
# Random Forest - Classficiation
model = RandomForestClassifier(n_estimators = 30, max_depth=5, random_state = 42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(accuracy_score(y_test, y_pred))

model = RandomForestClassifier(n_estimators = 100, max_depth=5, random_state = 42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(accuracy_score(y_test, y_pred))

0.88
0.92
