In [1]:
import numpy as np
import pandas as pd

In [2]:
# we are using regression here, Regression is different from classification
# In classification we have to seperate the data into different categories 
# while in regression we have to use the input data to make prediction for future data
# like predicting the price of stock

In [3]:
# let us get the dataset 

# about the dataset:
# NASA data set, obtained from a series of aerodynamic and acoustic tests of two and three-dimensional airfoil blade sections conducted in an anechoic wind tunnel.
df = pd.read_csv('airfoil_self_noise.dat', sep='\t', header=None)

In [4]:
df.head()

Unnamed: 0,0,1,2,3,4,5
0,800,0.0,0.3048,71.3,0.002663,126.201
1,1000,0.0,0.3048,71.3,0.002663,125.201
2,1250,0.0,0.3048,71.3,0.002663,125.951
3,1600,0.0,0.3048,71.3,0.002663,127.591
4,2000,0.0,0.3048,71.3,0.002663,127.461


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1503 entries, 0 to 1502
Data columns (total 6 columns):
0    1503 non-null int64
1    1503 non-null float64
2    1503 non-null float64
3    1503 non-null float64
4    1503 non-null float64
5    1503 non-null float64
dtypes: float64(5), int64(1)
memory usage: 70.5 KB


In [6]:
# getting our data ready
data = df[[0,1,2,3,4,5]].values
# we are using the values function whose job is the same as the as_matrix
# the as_matrix() is going to be removed 

In [7]:
# getting our targets ready
target = df[5].values

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.33)

In [10]:
# selecting our model
from sklearn.linear_model import LinearRegression

In [11]:
model = LinearRegression()

In [12]:
# training our data
model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [13]:
# here the score is not an accuracy but it is the mean relative mean square error
model.score(X_train, y_train)
# 1 is still good only, and close to one is appreciated 

1.0

In [14]:
model.score(X_test, y_test)

1.0

In [15]:
predictions = model.predict(X_test)

In [16]:
predictions

array([113.004, 118.291, 127.556, 121.771, 133.211, 127.71 , 113.525,
       121.313, 125.683, 135.329, 123.988, 130.541, 112.945, 118.343,
       127.233, 129.119, 131.491, 130.156, 124.106, 134.928, 126.944,
       120.963, 123.312, 110.905, 122.45 , 127.696, 120.015, 112.506,
       132.395, 128.907, 127.315, 124.839, 134.976, 131.236, 124.692,
       136.883, 126.457, 112.93 , 121.485, 127.076, 124.222, 134.568,
       110.364, 118.618, 120.575, 113.796, 128.345, 132.252, 122.966,
       125.094, 119.708, 130.893, 119.039, 130.567, 122.905, 122.088,
       121.225, 130.307, 112.209, 122.175, 110.313, 119.91 , 118.619,
       124.625, 119.854, 127.054, 129.367, 118.994, 132.54 , 134.111,
       130.715, 138.274, 120.458, 133.611, 132.301, 119.146, 128.518,
       113.775, 121.635, 114.569, 132.658, 122.411, 128.401, 135.674,
       115.234, 123.917, 127.006, 125.586, 125.741, 123.303, 122.708,
       117.151, 124.353, 135.328, 121.627, 122.845, 135.234, 129.491,
       103.38 , 127.

In [17]:
# let us use neural networks this time
from sklearn.neural_network import MLPRegressor


In [18]:
from sklearn.preprocessing import StandardScaler

In [19]:
scaler = StandardScaler()

In [20]:
model = MLPRegressor()

In [22]:
model.fit(X_train, y_train)



MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [23]:
model.score(X_train, y_train)

0.99503811350084

In [24]:
model.score(X_test, y_test)

0.9954242046804798

In [None]:
predictions = model.predict(X_test)