<a href="https://colab.research.google.com/github/Naeima/PoachNet/blob/main/Linear_Regression%2C_Polynomial_and_VAR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
# Standard library imports
import sys
import datetime as datetime
# Third-party library imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
import matplotlib.pylab
from matplotlib import pyplot
import seaborn as sns
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.seasonal import seasonal_decompose
# Pandas-specific import
from pandas import Series

In [26]:
# Import necessary libraries
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.api import VAR
from google.colab import files

# Prompt for file upload
uploaded = files.upload()

# Read the uploaded CSV file into a pandas DataFrame
for filename in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(
        name=filename, length=len(uploaded[filename])))
    data = pd.read_csv(filename)

# Load and preprocess the dataset
def preprocess_dataset(file_path):
    # Load the dataset
    df = pd.read_csv(file_path)

    # Combine LocalDate and LocalTime into a single datetime column
    df['datetime'] = pd.to_datetime(df['LocalDate'] + ' ' + df['LocalTime'])

    # Extract relevant columns
    df = df[['datetime', 'lat', 'long']]
    df['day'] = df['datetime'].dt.day
    df['month'] = df['datetime'].dt.month
    df['year'] = df['datetime'].dt.year

    return df

# Linear Regression Implementation
def run_linear_regression(dropped_df):
    meanCVrmse = []

    # Independent and dependent variables
    time_feature = dropped_df['day'].values.reshape(-1, 1)
    lat = dropped_df['lat'].values
    lon = dropped_df['long'].values

    # Initialize linear regression
    linear_regressor = LinearRegression()

    # Cross-validation
    tscv = TimeSeriesSplit(n_splits=5)

    for train_index, test_index in tscv.split(time_feature):
        time_train, time_test = time_feature[train_index], time_feature[test_index]
        lat_train, lat_test = lat[train_index], lat[test_index]
        lon_train, lon_test = lon[train_index], lon[test_index]

        # Train and predict latitude
        linear_regressor.fit(time_train, lat_train)
        lat_pred = linear_regressor.predict(time_test)
        lat_rmse = np.sqrt(mean_squared_error(lat_test, lat_pred))

        # Train and predict longitude
        linear_regressor.fit(time_train, lon_train)
        lon_pred = linear_regressor.predict(time_test)
        lon_rmse = np.sqrt(mean_squared_error(lon_test, lon_pred))

        meanCVrmse.append((lat_rmse, lon_rmse))

    # Compute average RMSE across folds
    avg_lat_rmse = np.mean([x[0] for x in meanCVrmse])
    avg_lon_rmse = np.mean([x[1] for x in meanCVrmse])

    return avg_lat_rmse, avg_lon_rmse

# Polynomial Regression Implementation
def run_polynomial_regression(dropped_df):
    meanCVrmse = []

    # Independent and dependent variables
    time_feature = dropped_df['day'].values.reshape(-1, 1)
    lat = dropped_df['lat'].values
    lon = dropped_df['long'].values

    # Polynomial features
    poly = PolynomialFeatures(degree=4, include_bias=False)
    time_poly = poly.fit_transform(time_feature)

    # Cross-validation
    tscv = TimeSeriesSplit(n_splits=5)

    for train_index, test_index in tscv.split(time_poly):
        time_train, time_test = time_poly[train_index], time_poly[test_index]
        lat_train, lat_test = lat[train_index], lat[test_index]
        lon_train, lon_test = lon[train_index], lon[test_index]

        # Train and predict latitude
        pol_reg = LinearRegression()
        pol_reg.fit(time_train, lat_train)
        lat_pred = pol_reg.predict(time_test)
        lat_rmse = np.sqrt(mean_squared_error(lat_test, lat_pred))

        # Train and predict longitude
        pol_reg.fit(time_train, lon_train)
        lon_pred = pol_reg.predict(time_test)
        lon_rmse = np.sqrt(mean_squared_error(lon_test, lon_pred))

        meanCVrmse.append((lat_rmse, lon_rmse))

    # Compute average RMSE across folds
    avg_lat_rmse = np.mean([x[0] for x in meanCVrmse])
    avg_lon_rmse = np.mean([x[1] for x in meanCVrmse])

    return avg_lat_rmse, avg_lon_rmse

# VAR Implementation
def run_var_model(dropped_df):
    obs = 1435
    day_train, day_test = dropped_df[['lat', 'long']][:-obs], dropped_df[['lat', 'long']][-obs:]

    # Initialize VAR model
    model = VAR(day_train)

    # Fit VAR model
    model_fitted = model.fit(2)

    # Forecasting
    forecast_input = day_train.values[-model_fitted.k_ar:]
    fc = model_fitted.forecast(y=forecast_input, steps=obs)

    # Postprocess predictions
    dailyVARPredictions = pd.DataFrame(
        fc,
        index=dropped_df.index[-obs:],
        columns=['Predicted Lat', 'Predicted Lon']
    )
    dailyVARPredictions['Actual Lat'] = day_test['lat'].values
    dailyVARPredictions['Actual Lon'] = day_test['long'].values

    # Compute RMSE
    lat_rmse = np.sqrt(mean_squared_error(dailyVARPredictions['Actual Lat'], dailyVARPredictions['Predicted Lat']))
    lon_rmse = np.sqrt(mean_squared_error(dailyVARPredictions['Actual Lon'], dailyVARPredictions['Predicted Lon']))

    return lat_rmse, lon_rmse

# Main function to run the models
def main(file_path):
    # Preprocess the dataset
    dropped_df = preprocess_dataset(file_path)

    # Run models
    linear_results = run_linear_regression(dropped_df)
    polynomial_results = run_polynomial_regression(dropped_df)
    var_results = run_var_model(dropped_df)

    # Display results
    print("Linear Regression Results (Latitude RMSE, Longitude RMSE):", linear_results)
    print("Polynomial Regression Results (Latitude RMSE, Longitude RMSE):", polynomial_results)
    print("VAR Model Results (Latitude RMSE, Longitude RMSE):", var_results)

# Run the script (provide the path to the CSV file)
# Uncomment the line below and replace 'path_to_csv' with the actual file path when running in a Colab environment
main('Seri.csv')


Saving Seri.csv to Seri (6).csv
User uploaded file "Seri (6).csv" with length 884978 bytes
Linear Regression Results (Latitude RMSE, Longitude RMSE): (0.10909395589963107, 0.1211952434721735)
Polynomial Regression Results (Latitude RMSE, Longitude RMSE): (0.10940488707653225, 0.12220658320410378)
VAR Model Results (Latitude RMSE, Longitude RMSE): (0.22087035805341285, 0.08953885489390001)
