1. Take one of the supervised learning models you have built recently and apply at least
three dimensionality reduction techniques to it (separately). Be sure to create a short
summary of each technique you use. Indicate how each changed the model
performance. Reference:
https://machinelearningmastery.com/dimensionality-reduction-algorithms-with-python/

In [15]:
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.metrics import confusion_matrix, classification_report, plot_confusion_matrix
import pydotplus
from IPython.display import Image

ab_df=pd.read_csv('abalone.data',delimiter=',',header=None)
ab_df.shape

(4177, 9)

In [16]:
ab_df.columns=['sex', 'length', 'diameter', 'height', 'whole_weight', 'shucked_weight', 'viscera_weight', 'shell_weight', 'rings']

In [17]:
df=ab_df[['sex', 'length', 'diameter', 'height', 'whole_weight', 'rings']]

In [26]:
outlier_rem_df=df.copy()
# IQR
Q1 = outlier_rem_df.quantile(0.25)
Q3 = outlier_rem_df.quantile(0.75)
IQR = Q3 - Q1
print(IQR)
mod_df_out = outlier_rem_df[~((outlier_rem_df < (Q1 - 1.5 * IQR)) |(outlier_rem_df > (Q3 + 1.5 * IQR))).any(axis=1)]
mod_df_out.shape

length          0.1650
diameter        0.1300
height          0.0500
whole_weight    0.7115
rings           3.0000
dtype: float64


(3821, 6)

In [27]:
mod_df_out['rings'].unique()

array([15,  7,  9, 10,  8, 14, 11, 12, 13,  5,  6,  4], dtype=int64)

In [28]:
aba_df=pd.get_dummies(mod_df_out)
aba_df

Unnamed: 0,length,diameter,height,whole_weight,rings,sex_F,sex_I,sex_M
0,0.455,0.365,0.095,0.5140,15,0,0,1
1,0.350,0.265,0.090,0.2255,7,0,0,1
2,0.530,0.420,0.135,0.6770,9,1,0,0
3,0.440,0.365,0.125,0.5160,10,0,0,1
4,0.330,0.255,0.080,0.2050,7,0,1,0
...,...,...,...,...,...,...,...,...
4172,0.565,0.450,0.165,0.8870,11,1,0,0
4173,0.590,0.440,0.135,0.9660,10,0,0,1
4174,0.600,0.475,0.205,1.1760,9,0,0,1
4175,0.625,0.485,0.150,1.0945,10,1,0,0


In [29]:
from sklearn.preprocessing import StandardScaler
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from sklearn.model_selection import train_test_split
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping

X= aba_df.drop('rings', axis=1)
y= aba_df['rings']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)

scaler = StandardScaler()
scaler.fit_transform(X_train, y_train)

# define model
n_cols = X_train.shape[1]
input_shape = (n_cols,)

model = Sequential()
model.add(Dense(50, activation='relu',input_shape=input_shape))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='linear'))
# compile model
model.compile(loss='mean_squared_error', optimizer=SGD(learning_rate=0.01, momentum=0.9))
early_stopping = EarlyStopping(patience=3, monitor='val_loss')
# fit model
history = model.fit(X_train, y_train, callbacks=[early_stopping, history], validation_data=(X_test, y_test), epochs=100, verbose=0)
y_pred= model.predict(X_test)
# evaluate the model
train_mse = model.evaluate(X_train, y_train, verbose=0)
test_mse = model.evaluate(X_test, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print('RMSE is ', rmse)

Train: 3.324, Test: 3.168
RMSE is  1.779959494990532


In [30]:
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=3)

X_train_svd=svd.fit_transform(X_train)
X_test_svd=svd.fit_transform(X_test)

# define model
n_cols = X_train_svd.shape[1]
input_shape = (n_cols,)

model = Sequential()
model.add(Dense(50, activation='relu',input_shape=input_shape))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='linear'))
# compile model
model.compile(loss='mean_squared_error', optimizer=SGD(learning_rate=0.01, momentum=0.9))
early_stopping = EarlyStopping(patience=3, monitor='val_loss')
# fit model
history = model.fit(X_train_svd, y_train, callbacks=[early_stopping, history], validation_data=(X_test_svd, y_test), epochs=100, verbose=0)
y_pred= model.predict(X_test_svd)
# evaluate the model
train_mse = model.evaluate(X_train_svd, y_train, verbose=0)
test_mse = model.evaluate(X_test_svd, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print('RMSE is ', rmse)

Train: 3.396, Test: 3.357
RMSE is  1.832105427228917


In [31]:
from sklearn.decomposition import PCA

pca = PCA(n_components=3)

X_train_pca=pca.fit_transform(X_train)
X_test_pca=pca.fit_transform(X_test)

# define model
n_cols = X_train_pca.shape[1]
input_shape = (n_cols,)

model = Sequential()
model.add(Dense(50, activation='relu',input_shape=input_shape))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='linear'))
# compile model
model.compile(loss='mean_squared_error', optimizer=SGD(learning_rate=0.01, momentum=0.9))
early_stopping = EarlyStopping(patience=3, monitor='val_loss')
# fit model
history = model.fit(X_train_pca, y_train, callbacks=[early_stopping, history], validation_data=(X_test_pca, y_test), epochs=100, verbose=0)
y_pred= model.predict(X_test_pca)
# evaluate the model
train_mse = model.evaluate(X_train_pca, y_train, verbose=0)
test_mse = model.evaluate(X_test_pca, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print('RMSE is ', rmse)

Train: 3.233, Test: 3.013
RMSE is  1.7357692640637896


In [32]:
from sklearn.manifold import Isomap

iso = Isomap(n_components=3)

X_train_iso=iso.fit_transform(X_train)
X_test_iso=iso.fit_transform(X_test)

# define model
n_cols = X_train_iso.shape[1]
input_shape = (n_cols,)

model = Sequential()
model.add(Dense(50, activation='relu',input_shape=input_shape))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='linear'))
# compile model
model.compile(loss='mean_squared_error', optimizer=SGD(learning_rate=0.01, momentum=0.9))
early_stopping = EarlyStopping(patience=3, monitor='val_loss')
# fit model
history = model.fit(X_train_iso, y_train, callbacks=[early_stopping, history], validation_data=(X_test_iso, y_test), epochs=100, verbose=0)
y_pred= model.predict(X_test_iso)
# evaluate the model
train_mse = model.evaluate(X_train_iso, y_train, verbose=0)
test_mse = model.evaluate(X_test_iso, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_mse, test_mse))
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print('RMSE is ', rmse)

Train: 3.408, Test: 4.094
RMSE is  2.023388892069943


Write a function that will indicate if an inputted IPv4 address is accurate or not.
IP addresses are valid if they have 4 values between 0 and 255 (inclusive), punctuated
by periods.

Input 1:
2.33.245.5

Output 1:
True

Input 2:
12.345.67.89

Output 2:
False

In [51]:
def ipv_check(ipv):
    if ipv.count(".") != 3:
        return "False"
    
    ipv_components= list(map(str, ipv.split(".")))
    print(ipv_components)
    
    for x in ipv_components:
        if int(x) < 0 or int(x) > 255:
            return "False"
        
    return "True"
    

In [52]:
ipv_check('2.33.245.5')

['2', '33', '245', '5']


'True'

In [53]:
ipv_check('12.345.67.89')

['12', '345', '67', '89']


'False'