In [70]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.layers import Dense, BatchNormalization


In [71]:
data = pd.read_csv('/content/81-90.csv')

In [72]:
from sklearn.model_selection import train_test_split


X = data[['cit_2017', 'cit_2018', 'cit_2019', 'cit_2020', 'cit_2021']]
y = data['cit_2022']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.shape, X_test.shape


((80, 5), (20, 5))

In [73]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [74]:
learning_rates = [0.1, 0.01, 0.001, 0.0001]
result = {}
prediction_array = {}
for learning_rate in learning_rates:
    print(f"Training with learning rate: {learning_rate}")
    model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(5,)),# Input layer with 5 features
    tf.keras.layers.Dense(3, activation='relu'),  # Hidden layer with 3 neurons and ReLU activation
    tf.keras.layers.Dense(1)              # Output layer with 1 neuron (for predicting cit_2022)
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    # Compile the model with the custom optimizer
    model.compile(optimizer=optimizer, loss='mean_absolute_error')
    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    history = model.fit(X_train, y_train, epochs=1000, batch_size=32,
                    validation_data=(X_test, y_test), verbose=1, callbacks=[early_stop])

    predictions = model.predict(X_test)
    prediction_array[learning_rate] = predictions
    mae_1 = mean_absolute_error(y_test, predictions)
    result[learning_rate]=mae_1
    print(f"Avg difference with learning rate {learning_rate}: {mae_1}")




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 260/1000
Epoch 261/1000
Epoch 262/1000
Epoch 263/1000
Epoch 264/1000
Epoch 265/1000
Epoch 266/1000
Epoch 267/1000
Epoch 268/1000
Epoch 269/1000
Epoch 270/1000
Epoch 271/1000
Epoch 272/1000
Epoch 273/1000
Epoch 274/1000
Epoch 275/1000
Epoch 276/1000
Epoch 277/1000
Epoch 278/1000
Epoch 279/1000
Epoch 280/1000
Epoch 281/1000
Epoch 282/1000
Epoch 283/1000
Epoch 284/1000
Epoch 285/1000
Epoch 286/1000
Epoch 287/1000
Epoch 288/1000
Epoch 289/1000
Epoch 290/1000
Epoch 291/1000
Epoch 292/1000
Epoch 293/1000
Epoch 294/1000
Epoch 295/1000
Epoch 296/1000
Epoch 297/1000
Epoch 298/1000
Epoch 299/1000
Epoch 300/1000
Epoch 301/1000
Epoch 302/1000
Epoch 303/1000
Epoch 304/1000
Epoch 305/1000
Epoch 306/1000
Epoch 307/1000
Epoch 308/1000
Epoch 309/1000
Epoch 310/1000
Epoch 311/1000
Epoch 312/1000
Epoch 313/1000
Epoch 314/1000
Epoch 315/1000
Epoch 316/1000
Epoch 317/1000
Epoch 318/1000
Epoch 319/1000
Epoch 320/1000
Epoch 321/1000
Epoch

###Results

In [75]:
for key in result:
      print("MAE for learning rating ",key, "->", result[key])

MAE for learning rating  0.1 -> 53.81144094467163
MAE for learning rating  0.01 -> 56.97324771881104
MAE for learning rating  0.001 -> 281.7354696273804
MAE for learning rating  0.0001 -> 337.4034264579415


In [77]:
print(prediction_array)

{0.1: array([[421.51794 ],
       [340.8882  ],
       [676.57025 ],
       [176.66997 ],
       [ 53.6513  ],
       [306.71863 ],
       [420.867   ],
       [ 94.21242 ],
       [193.91725 ],
       [296.33847 ],
       [589.3333  ],
       [370.29465 ],
       [ 22.109575],
       [443.53845 ],
       [ 61.56844 ],
       [441.17783 ],
       [313.89084 ],
       [785.94604 ],
       [242.45586 ],
       [146.26924 ]], dtype=float32), 0.01: array([[403.86148  ],
       [339.03836  ],
       [634.2596   ],
       [180.70296  ],
       [ 69.756645 ],
       [293.87784  ],
       [422.86542  ],
       [108.18387  ],
       [190.17952  ],
       [300.78506  ],
       [531.4589   ],
       [359.65515  ],
       [ 10.4374275],
       [426.20697  ],
       [ 75.764046 ],
       [435.31912  ],
       [312.05035  ],
       [703.6369   ],
       [242.0471   ],
       [149.31876  ]], dtype=float32), 0.001: array([[ 37.145004],
       [ 94.6307  ],
       [109.04602 ],
       [ 58.831547],
   