<a href="https://colab.research.google.com/github/RemyaVKarthikeyan/AA-Stagecoach-Project/blob/main/Linear_Regression_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the data
route_EWT_A = pd.read_csv('/content/route_EWT_A.csv')
route_EWT_B = pd.read_csv('/content/route_EWT_B.csv')

# Replace '-' with NaN and drop rows with NaN values
route_EWT_A.replace('-', np.nan, inplace=True)
route_EWT_B.replace('-', np.nan, inplace=True)
route_EWT_A.dropna(inplace=True)
route_EWT_B.dropna(inplace=True)

# Convert columns to appropriate data types
route_EWT_A['AWT'] = route_EWT_A['AWT'].astype(float)
route_EWT_A['EWT'] = route_EWT_A['EWT'].astype(float)
route_EWT_B['AWT'] = route_EWT_B['AWT'].astype(float)
route_EWT_B['EWT'] = route_EWT_B['EWT'].astype(float)

# Function to create features
def create_features(df):
    df['Time'] = pd.to_datetime(df['Time'], format='%H:%M')
    df['Hour'] = df['Time'].dt.hour
    df['Minute'] = df['Time'].dt.minute

    # Create lag features for AWT and EWT
    df['AWT_lag1'] = df['AWT'].shift(1)
    df['EWT_lag1'] = df['EWT'].shift(1)

    # Rolling average features
    df['AWT_roll_mean'] = df['AWT'].rolling(window=4).mean()
    df['EWT_roll_mean'] = df['EWT'].rolling(window=4).mean()

    # Drop rows with NaN values created by shifting
    df = df.dropna()

    return df

# Create features for both datasets
route_EWT_A = create_features(route_EWT_A)
route_EWT_B = create_features(route_EWT_B)

# Define the features and target
features = ['Hour', 'Minute', 'SWT', 'AWT_lag1', 'EWT_lag1', 'AWT_roll_mean', 'EWT_roll_mean']
target_awt = 'AWT'
target_ewt = 'EWT'

# Split the data into training and testing sets
X_train, X_test, y_train_awt, y_test_awt = train_test_split(route_EWT_A[features], route_EWT_A[target_awt], test_size=0.2, random_state=42)
X_train, X_test, y_train_ewt, y_test_ewt = train_test_split(route_EWT_A[features], route_EWT_A[target_ewt], test_size=0.2, random_state=42)

# Train the models
model_awt = LinearRegression()
model_awt.fit(X_train, y_train_awt)

model_ewt = LinearRegression()
model_ewt.fit(X_train, y_train_ewt)

# Define the SWT values for the next two hours
swt_hr1 = 5.80
swt_hr2 = 5.90

# Create a DataFrame for the next two hours
next_hours = pd.DataFrame({
    'Hour': [route_EWT_A['Hour'].max() + 1, route_EWT_A['Hour'].max() + 2],
    'Minute': [0, 0],
    'SWT': [swt_hr1, swt_hr2],
    'AWT_lag1': [route_EWT_A['AWT'].iloc[-1], None],
    'EWT_lag1': [route_EWT_A['EWT'].iloc[-1], None],
    'AWT_roll_mean': [route_EWT_A['AWT'].rolling(window=4).mean().iloc[-1], None],
    'EWT_roll_mean': [route_EWT_A['EWT'].rolling(window=4).mean().iloc[-1], None],
})

# Fill missing values with the last known values
next_hours.fillna(method='ffill', inplace=True)

# Predict AWT and EWT
pred_awt = model_awt.predict(next_hours[features])
pred_ewt = model_ewt.predict(next_hours[features])

print('Predicted AWT for the next two hours:', pred_awt)
print('Predicted EWT for the next two hours:', pred_ewt)


Predicted AWT for the next two hours: [6.30242753 5.95702809]
Predicted EWT for the next two hours: [ 0.30242753 -0.04297191]
