## Creating a ANN - Regression model used to predict Profit
###### Data set - 50_startups

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('DataSets/50_Startups.csv')
df.head(2)

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06


#### Data - preprocessing:

In [3]:
print(df.isnull().all().sum()) # any null values?

0


In [4]:
print(df.State.value_counts())
df.State.unique()

New York      17
California    17
Florida       16
Name: State, dtype: int64


array(['New York', 'California', 'Florida'], dtype=object)

#### Encoding categorical data - State

In [5]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['State'] = le.fit_transform(df['State'])
df.head(2)

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,2,192261.83
1,162597.7,151377.59,443898.53,0,191792.06


#### Splitting data:

In [6]:
# Splitting int x and y:
# x -> R&D Spend, Administration, Marketing Spend, State
# y -> Profit

x = df.iloc[:,0:4]
y = df.iloc[:,4:5]
print(x.head(2))
print(y[0:2])

   R&D Spend  Administration  Marketing Spend  State
0   165349.2       136897.80        471784.10      2
1   162597.7       151377.59        443898.53      0
      Profit
0  192261.83
1  191792.06


In [7]:
# Splitting into xtrain, ytrain, xtest, ytest:

from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2,random_state=10)

print(xtrain.shape, xtest.shape, ytrain.shape, ytest.shape)

(40, 4) (10, 4) (40, 1) (10, 1)


#### Creating ANN-Regression Model

###### link - https://www.analyticsvidhya.com/blog/2020/01/fundamentals-deep-learning-activation-functions-when-to-use-them/

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense



In [9]:
model = Sequential()

model.add(Dense(4, activation='relu'))
model.add(Dense(27, activation='relu'))
model.add(Dense(81, activation='relu'))
model.add(Dense(1, activation='linear'))

In [10]:
model.compile(optimizer='adam', loss='mse')

In [11]:
model.fit(xtrain, ytrain, batch_size=4, epochs=13)

Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13


<keras.callbacks.History at 0x209924e0460>

In [12]:
model.save('Models/costprediction.h5')

In [13]:
ypred = model.predict(xtest)

In [14]:
comp = pd.DataFrame(ytest)
comp['ypred'] = ypred
comp


Unnamed: 0,Profit,ypred
37,89949.14,64519.761719
23,108733.99,107051.65625
44,65200.33,62816.800781
42,71498.49,70996.429688
47,42559.73,41272.117188
20,118474.03,116691.289062
3,182901.99,164640.90625
30,99937.59,82260.328125
7,155752.6,170428.0625
6,156122.51,131612.828125


In [15]:
# predict new:

model.predict([[165349.2, 136897.80, 471784.10, 2]])

array([[189548.22]], dtype=float32)