In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.optimizers import Adam

file_path = '/content/CancerFactorsData.xlsx'
df = pd.read_excel(file_path)

In [7]:
# Define your input features and target variable
features = ['Smoking', 'Obesity', 'Poverty', 'Uninsured', 'PM2.5']
target = 'LungCancerRates'

# Ensure data is sorted by time
df = df.sort_index()

# Normalize the features and the target separately
scaler_features = MinMaxScaler()
scaler_target = MinMaxScaler()

df[features] = scaler_features.fit_transform(df[features])
df[[target]] = scaler_target.fit_transform(df[[target]])

# Creating input sequences for LSTM
def create_sequences(df, features, target, sequence_length=10):
    X = []
    y = []
    for i in range(len(df) - sequence_length):
        X.append(df[features].iloc[i:i+sequence_length].values)
        y.append(df[target].iloc[i+sequence_length])
    return np.array(X), np.array(y)

sequence_length = 10
X, y = create_sequences(df, features, target, sequence_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating Sequential model
model = Sequential()

# Adding LSTM layers with Dropout for regularization
model.add(LSTM(64, input_shape=(sequence_length, len(features)), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(32))
model.add(Dropout(0.2))

# Adding Dense layer for regression output
model.add(Dense(1))

# Compiling the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mae'])

# Training the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Evaluating the model
loss, mae = model.evaluate(X_test, y_test)

# Making predictions
predictions = model.predict(X_test)

# Inverse transform the target values
y_test_original = scaler_target.inverse_transform(y_test.reshape(-1, 1))
predictions_original = scaler_target.inverse_transform(predictions)

# Calculating and printing metrics
mae = mean_absolute_error(y_test_original, predictions_original)
mse = mean_squared_error(y_test_original, predictions_original)
r2 = r2_score(y_test_original, predictions_original)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R²): {r2}")

# Printing sample predicted and actual values for comparison
print("Sample predictions and actual values:")
for i in range(5):  # Print first 5 samples for comparison
    print(f"Predicted: {predictions_original[i][0]}, Actual: {y_test_original[i][0]}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78