<a href="https://colab.research.google.com/github/Minakshi654/Modelname/blob/main/RFPredictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score

# Load the dataset
df = pd.read_csv('Intervals.csv', parse_dates=['Interval_start'], dayfirst=True)

# Prepare the data
df.set_index('Interval_start', inplace=True)

# Create lag features
for lag in range(1, 6):
    df[f'lag_{lag}'] = df['Max_total'].shift(lag)

# Drop rows with NaN values
df.dropna(inplace=True)

# Split the data into training and test sets
train_size = int(len(df) * 0.8)
train, test = df.iloc[:train_size], df.iloc[train_size:]

# Separate features and target variable
X_train, y_train = train.drop(columns=['Max_total']), train['Max_total']
X_test, y_test = test.drop(columns=['Max_total']), test['Max_total']

# Fit the Random Forest model
model = RandomForestRegressor(n_estimators=50, random_state=42)
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)
test['Predicted'] = predictions

# Handle NaN values in 'Predicted' column
test['Predicted'].fillna(0, inplace=True)

# Calculate performance metrics
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)

# Add status column
test['Status'] = np.where(test['Predicted'] >= 1, 'Overflow', 'Normal')

# Calculate accuracy and F1 score
test['Actual_Status'] = np.where(test['Max_total'] >= 1, 'Overflow', 'Normal')
accuracy = accuracy_score(test['Actual_Status'], test['Status'])
f1 = f1_score(test['Actual_Status'], test['Status'], pos_label='Overflow')

# Print results
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'F1 Score: {f1}')

# Print count of overflow
overflow_count = test['Status'].value_counts().get('Overflow', 0)
print(f'Overflow count: {overflow_count}')

# Save the results to a new CSV file
test.to_csv('Predicted_Intervals.csv')

# Calculate correlations
correlations = df.corr()
print(correlations)
