<a href="https://colab.research.google.com/github/AryaN7643/AI-/blob/main/Regression_with_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Use case: Build an ANN model that predicts the Power Energy for Powerplant based data. Independent features: sensor readings related to powerplant machines

In [25]:
from warnings import filterwarnings
filterwarnings('ignore')

In [26]:
import pandas as pd
df = pd.read_csv(r'https://raw.githubusercontent.com/sindhura-nk/Datasets/refs/heads/main/PowerPlant.csv')
df.head()

Unnamed: 0,AT,V,AP,RH,PE
0,8.34,40.77,1010.84,90.01,480.48
1,23.64,58.49,1011.4,74.2,445.75
2,29.74,56.9,1007.15,41.91,438.76
3,19.07,49.69,1007.22,76.79,453.09
4,11.8,40.66,1017.13,97.2,464.43


  AT: Atmospheric Temperature
  V: Vaccum
  AP: Atmospheric Pressure
  RH: Relative Humidity
  PE: Power Energy

### Perform basic data quality checks

In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9568 entries, 0 to 9567
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      9568 non-null   float64
 1   V       9568 non-null   float64
 2   AP      9568 non-null   float64
 3   RH      9568 non-null   float64
 4   PE      9568 non-null   float64
dtypes: float64(5)
memory usage: 373.9 KB


In [28]:
df.isna().sum()

Unnamed: 0,0
AT,0
V,0
AP,0
RH,0
PE,0


In [29]:
df.duplicated().sum()

np.int64(41)

In [30]:
df=df.drop_duplicates()

## Separate data into X and Y features

In [31]:
X = df.drop(columns=['PE'])
Y = df[["PE"]]

In [32]:
X.head()

Unnamed: 0,AT,V,AP,RH
0,8.34,40.77,1010.84,90.01
1,23.64,58.49,1011.4,74.2
2,29.74,56.9,1007.15,41.91
3,19.07,49.69,1007.22,76.79
4,11.8,40.66,1017.13,97.2


In [33]:
Y.head()

Unnamed: 0,PE
0,480.48
1,445.75
2,438.76
3,453.09
4,464.43


## Data Preprocessing and Data cleaning

In [34]:
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

In [35]:
num_pipe = make_pipeline(SimpleImputer(strategy='mean'),StandardScaler()).set_output(transform='pandas')

In [36]:
num_pipe

In [37]:
X_pre = num_pipe.fit_transform(X)
X_pre.head()

Unnamed: 0,AT,V,AP,RH
0,-1.520448,-1.066041,-0.403535,1.141599
1,0.534897,0.330813,-0.309262,0.059223
2,1.354348,0.205475,-1.024725,-2.1514
3,-0.07902,-0.362884,-1.012941,0.236538
4,-1.055645,-1.074713,0.655349,1.633837


## Train test split

In [38]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(X_pre,Y,train_size=0.85,random_state=21)

In [39]:
print(xtrain.shape,xtest.shape,ytrain.shape,ytest.shape)

(8097, 4) (1430, 4) (8097, 1) (1430, 1)


## Artificial Neural Network Model Building

In [40]:
from keras.models import Sequential
from keras.layers import Input,Dense


In [41]:
xtrain.shape

(8097, 4)

In [42]:
xtrain.shape[1]

4

In [43]:
model = Sequential()

# Add the Input layer
model.add(Input(shape=(xtrain.shape[1],)))

# Add the Hidden layers1
model.add(Dense(units=8,activation='relu'))

# Add the Hidden layer2
model.add(Dense(units=4,activation='relu'))

# Add the output layer
model.add(Dense(units=1,activation='linear'))

In [44]:
model.summary()

In [45]:
model.compile(optimizer='adam',loss='mean_squared_error',metrics=['mae'])

In [None]:
nn = model.fit(xtrain,ytrain,validation_split=0.2,epochs=40)

Epoch 1/40
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - loss: 205961.3438 - mae: 453.5265 - val_loss: 203809.8438 - val_mae: 451.2063
Epoch 2/40
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 200918.6406 - mae: 448.0238 - val_loss: 190031.3125 - val_mae: 435.8123
Epoch 3/40
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 182653.5469 - mae: 427.1012 - val_loss: 154203.8594 - val_mae: 391.5028
Epoch 4/40
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 142383.8438 - mae: 374.7852 - val_loss: 105885.8672 - val_mae: 317.2727
Epoch 5/40
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 95933.5938 - mae: 297.9629 - val_loss: 66052.6250 - val_mae: 234.9305
Epoch 6/40
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 59542.6914 - mae: 221.7321 - val_loss: 42535.6758 - val_mae: 183.0164
Epoch 7/

In [None]:
## Plot the learning curve
import matplotlib.pyplot as plt
plt.plot(nn.history['loss'],label='Training Loss')
plt.plot(nn.history['val_loss'],label='Validation Loss')
plt.xlabel("Number of Epochs")
plt.ylabel("Mean Squared Error")
plt.legend()
plt.title("Learning Curve")
plt.show()