<a href="https://colab.research.google.com/github/Minakshi654/Modelname/blob/main/Untitled11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
df = pd.read_csv('Intervals.csv', parse_dates=['Interval_start'], dayfirst=True)

# Ensure the data is sorted by date
df = df.sort_values('Interval_start')

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
df['Max_total'] = scaler.fit_transform(df[['Max_total']])

# Prepare the data for Random Forest
def create_dataset(data, time_step=1):
    X, Y = [], []
    for i in range(len(data)-time_step-1):
        a = data[i:(i+time_step), 0]
        X.append(a)
        Y.append(data[i + time_step, 0])
    return np.array(X), np.array(Y)

# Split the data into training and testing sets
train_size = int(len(df) * 0.8)
train, test = df[:train_size], df[train_size:]

# Reshape the data
time_step = 30  # 30 minutes
X_train, y_train = create_dataset(train[['Max_total']].values, time_step)
X_test, y_test = create_dataset(test[['Max_total']].values, time_step)

# Build the Random Forest model
model = RandomForestRegressor(n_estimators=10)
model.fit(X_train, y_train)

# Make predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform to get actual values
train_predict = scaler.inverse_transform(train_predict.reshape(-1, 1))
y_train = scaler.inverse_transform(y_train.reshape(-1, 1))
test_predict = scaler.inverse_transform(test_predict.reshape(-1, 1))
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate metrics
mse = mean_squared_error(y_test, test_predict)
rmse = np.sqrt(mse)
accuracy = accuracy_score(np.where(y_test >= 1, 'Overflow', 'Normal'), np.where(test_predict >= 1, 'Overflow', 'Normal'))
f1 = f1_score(np.where(y_test >= 1, 'Overflow', 'Normal'), np.where(test_predict >= 1, 'Overflow', 'Normal'), pos_label='Overflow')
correlation = np.corrcoef(y_test[:, 0], test_predict[:, 0])[0, 1]

# Print metrics
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'F1 Score: {f1}')
print(f'Correlation: {correlation}')




MSE: 0.00029112299381302953
RMSE: 0.01706232674089409
Accuracy: 99.99%
F1 Score: 0.5142857142857143
Correlation: 0.987341431349786


ValueError: Length of values (120005) does not match length of index (120036)

In [6]:
# Add status column to test set
test = test.reset_index(drop=True)
test['Predicted'] = pd.Series(test_predict.flatten()) # Convert test_predict to a Series to ensure matching length
test['Status'] = np.where(test['Predicted'] >= 1, 'Overflow', 'Normal')
num_overflows = (test['Predicted'] >= 1).sum()
print(f'Number of overflows: {num_overflows}')
# Save the results
test.to_csv('Predicted_Intervals.csv', index=False)

Number of overflows: 14


In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv('Intervals.csv', parse_dates=['Interval_start'], dayfirst=True)

# Ensure the data is sorted by date
df = df.sort_values('Interval_start')

# Split the data into training and testing sets
train_size = int(len(df) * 0.8)
train, test = df[:train_size], df[train_size:]

# Fit the ARIMA model
model = ARIMA(train['Max_total'], order=(5,1,0))
model_fit = model.fit()

# Make predictions
predictions = model_fit.forecast(steps=len(test))
test['Predicted'] = predictions

# Add status column
test['Status'] = np.where(test['Predicted'] >= 1, 'Overflow', 'Normal')

# Calculate metrics
mse = mean_squared_error(test['Max_total'], test['Predicted'])
rmse = np.sqrt(mse)
accuracy = accuracy_score(test['Status'], np.where(test['Max_total'] >= 1, 'Overflow', 'Normal'))
f1 = f1_score(test['Status'], np.where(test['Max_total'] >= 1, 'Overflow', 'Normal'), pos_label='Overflow')
correlation = test[['Max_total', 'Predicted']].corr().iloc[0, 1]

# Print metrics
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'F1 Score: {f1}')
print(f'Correlation: {correlation}')

# Save the results
test.to_csv('Predicted_Intervals.csv', index=False)


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv('Intervals.csv', parse_dates=['Interval_start'], dayfirst=True)

# Ensure the data is sorted by date
df = df.sort_values('Interval_start')

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
df['Max_total'] = scaler.fit_transform(df[['Max_total']])

# Prepare the data for LSTM
def create_dataset(data, time_step=1):
    X, Y = [], []
    for i in range(len(data)-time_step-1):
        a = data[i:(i+time_step), 0]
        X.append(a)
        Y.append(data[i + time_step, 0])
    return np.array(X), np.array(Y)

# Split the data into training and testing sets
train_size = int(len(df) * 0.8)
train, test = df[:train_size], df[train_size:]

# Reshape the data
time_step = 30  # 30 minutes
X_train, y_train = create_dataset(train[['Max_total']].values, time_step)
X_test, y_test = create_dataset(test[['Max_total']].values, time_step)

# Reshape input to be [samples, time steps, features] which is required for LSTM
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_step, 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=64, verbose=1)

# Make predictions for the next year
future_steps = 365 * 24 * 2  # Assuming 30-second intervals for a year
last_data = df['Max_total'].values[-time_step:]
future_predictions = []

for _ in range(future_steps):
    last_data_reshaped = last_data.reshape(1, -1, 1)
    next_pred = model.predict(last_data_reshaped)
    future_predictions.append(next_pred[0, 0])
    last_data = np.append(last_data[1:], next_pred)

# Inverse transform to get actual values
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))

# Create a DataFrame for future predictions
future_dates = pd.date_range(start=df['Interval_start'].iloc[-1], periods=future_steps, freq='30S')
future_df = pd.DataFrame({'Interval_start': future_dates, 'Predicted': future_predictions.flatten()})

# Add status column
future_df['Status'] = np.where(future_df['Predicted'] >= 1, 'Overflow', 'Normal')

# Calculate metrics on the test set
test_predict = model.predict(X_test)
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))

mse = mean_squared_error(y_test, test_predict)
rmse = np.sqrt(mse)
accuracy = accuracy_score(np.where(y_test >= 1, 'Overflow', 'Normal'), np.where(test_predict >= 1, 'Overflow', 'Normal'))
f1 = f1_score(np.where(y_test >= 1, 'Overflow', 'Normal'), np.where(test_predict >= 1, 'Overflow', 'Normal'), pos_label='Overflow')
correlation = np.corrcoef(y_test[:, 0], test_predict[:, 0])[0, 1]

# Print metrics
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'F1 Score: {f1}')
print(f'Correlation: {correlation}')

# Save the results
future_df.to_csv('Predicted_Intervals_2024.csv', index=False)

# Plot the predictions
plt.figure(figsize=(14, 7))
plt.plot(future_df['Interval_start'], future_df['Predicted'], label='Predicted')
plt.axhline(y=1, color='r', linestyle='--', label='Overflow Threshold')
plt.xlabel('Date')
plt.ylabel('Max_total')
plt.title('Forecasted Max_total for 2024')
plt.legend()
plt.show()


  super().__init__(**kwargs)


Epoch 1/10
[1m7502/7502[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m283s[0m 37ms/step - loss: 3.6226e-04
Epoch 2/10
[1m7502/7502[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m326s[0m 38ms/step - loss: 1.3912e-04
Epoch 3/10
[1m7502/7502[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m279s[0m 37ms/step - loss: 1.3041e-04
Epoch 4/10
[1m7502/7502[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m325s[0m 38ms/step - loss: 1.2376e-04
Epoch 5/10
[1m7502/7502[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 37ms/step - loss: 1.1753e-04
Epoch 6/10
[1m7502/7502[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m282s[0m 38ms/step - loss: 1.1404e-04
Epoch 7/10
[1m7502/7502[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 37ms/step - loss: 1.0563e-04
Epoch 8/10
[1m4777/7502[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m1:40[0m 37ms/step - loss: 9.5876e-05