## Polinom regresyon

### Data & Import

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 

In [3]:
df = pd.read_csv("community_health_evolved.csv")
df.head()

Unnamed: 0,yas,cins,irk,kilo,boy,visits,vki
0,47,0,4,112.0,165.0,28,41.14
1,26,1,3,128.0,168.0,23,45.35
2,26,1,2,91.0,173.0,14,30.41
3,39,1,3,108.0,185.0,21,31.56
4,55,1,1,90.0,155.0,27,37.46


In [4]:
X = df.drop("visits",axis= 1)
y = df["visits"]

 ## Poly Feat

In [5]:
from sklearn.preprocessing import PolynomialFeatures

In [6]:
help(PolynomialFeatures)

Help on class PolynomialFeatures in module sklearn.preprocessing._polynomial:

class PolynomialFeatures(sklearn.base.TransformerMixin, sklearn.base.BaseEstimator)
 |  PolynomialFeatures(degree=2, *, interaction_only=False, include_bias=True, order='C')
 |
 |  Generate polynomial and interaction features.
 |
 |  Generate a new feature matrix consisting of all polynomial combinations
 |  of the features with degree less than or equal to the specified degree.
 |  For example, if an input sample is two dimensional and of the form
 |  [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].
 |
 |  Read more in the :ref:`User Guide <polynomial_features>`.
 |
 |  Parameters
 |  ----------
 |  degree : int or tuple (min_degree, max_degree), default=2
 |      If a single int is given, it specifies the maximal degree of the
 |      polynomial features. If a tuple `(min_degree, max_degree)` is passed,
 |      then `min_degree` is the minimum and `max_degree` is the maximum
 |      po

In [7]:
poly_conv = PolynomialFeatures(degree=2, include_bias= False)

In [8]:
poly_conv

In [9]:
X.head()

Unnamed: 0,yas,cins,irk,kilo,boy,vki
0,47,0,4,112.0,165.0,41.14
1,26,1,3,128.0,168.0,45.35
2,26,1,2,91.0,173.0,30.41
3,39,1,3,108.0,185.0,31.56
4,55,1,1,90.0,155.0,37.46


In [10]:
X.shape

(1000, 6)

In [11]:
poly_feat = poly_conv.fit_transform(X)

# poly_conv.fit(X)
# poly_conv_transform(X) de yapabiliriz aynı şey. (transform 1,A,B,AB,A^2,B^2 şeklinde çarpılarak sütun oluşmasını sağlıyor)

In [12]:
poly_feat.shape # sütun 27 oldu

(1000, 27)

In [13]:
poly_feat

array([[4.7000000e+01, 0.0000000e+00, 4.0000000e+00, ..., 2.7225000e+04,
        6.7881000e+03, 1.6924996e+03],
       [2.6000000e+01, 1.0000000e+00, 3.0000000e+00, ..., 2.8224000e+04,
        7.6188000e+03, 2.0566225e+03],
       [2.6000000e+01, 1.0000000e+00, 2.0000000e+00, ..., 2.9929000e+04,
        5.2609300e+03, 9.2476810e+02],
       ...,
       [5.4000000e+01, 0.0000000e+00, 2.0000000e+00, ..., 3.6100000e+04,
        7.4214000e+03, 1.5256836e+03],
       [3.8000000e+01, 1.0000000e+00, 5.0000000e+00, ..., 3.6100000e+04,
        1.5257000e+03, 6.4480900e+01],
       [6.2000000e+01, 0.0000000e+00, 4.0000000e+00, ..., 2.8900000e+04,
        5.7052000e+03, 1.1262736e+03]])

## Train test split

In [14]:
from sklearn.model_selection import train_test_split

In [15]:
X_train, X_test, y_train, y_test = train_test_split(poly_feat,y,test_size=0.3,random_state=99)

In [16]:
from sklearn.linear_model import LinearRegression

In [17]:
model = LinearRegression(fit_intercept=True) # Polinoma göre oturtucak

In [18]:
model.fit(X_train,y_train)

In [19]:
poly_pred = model.predict(X_test)

In [20]:
poly_pred


array([25.67001348, 16.1699677 , 15.75413195, 11.69511931, 28.75385893,
       12.17613712, 18.39435392, 15.43772241, 26.21788998, 16.02997825,
       13.43504727, 16.91765224, 17.48623931, 13.23458686, 27.59426208,
       11.77376512, 17.64835654, 21.94766145, 23.94809401, 12.6527678 ,
       11.64232456, 31.30749454, 19.32749405, 16.50695901, 21.45493746,
       23.15280438, 21.99543316, 14.23457523, 33.14602378, 16.24974979,
       17.71967737, 28.44865215, 22.64357355, 30.49487597, 27.3618308 ,
       13.13680272, 18.25262428, 20.79016797, 10.9981268 , 24.40646329,
       19.2039664 , 18.25200649, 25.34599596, 20.58808931, 12.950166  ,
       16.63803329, 24.7847692 ,  8.73767166, 19.81601231, 27.49939379,
       20.46525462, 23.81314193, 22.41533911, 16.23706963,  7.99744784,
       17.73563787, 14.91204915, 16.78318678, 30.59279106, 21.76198219,
       21.5606546 , 18.50043829, 12.59251122, 24.89097396, 17.46538439,
       14.5027834 , 19.36927848, 13.78225384, 15.90629909, 36.95

## PERFORMANS MATRİKLERİ

In [21]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [22]:
mae = mean_absolute_error(y_test, poly_pred)
rmse = np.sqrt(mean_squared_error(y_test, poly_pred))
r2 = r2_score(y_test, poly_pred)

In [23]:
print("MAE : ",mae)
print("R2 : ",r2)
print("RMSE :",rmse)

MAE :  2.1497662349710644
R2 :  0.8550543134241528
RMSE : 2.51980680327608


In [24]:
## Bu sağlık verisi için polinom regresyon modeli çok da iyi bir seçim değilmiş :/