In [None]:
# Data Wrangling
import pandas as pd
import numpy as np

# Visualisation
import matplotlib.pyplot as plt
import seaborn as sns

# Data Transformation
from sklearn.impute import SimpleImputer

In [None]:
# URL of the CSV file

# Read the CSV file directly from the URL
df = pd.read_csv('C:/Users/himan/OneDrive/Documents/RedbackOperations/redback-fit-sports-performance/Cycling Analysis/data/activities_cleaned.csv')

# Display the first 10 rows
df.head()

In [None]:
# Isolate the running data component
df_ride = df[df['Activity Type'] == 'Ride']

df_ride.head()

In [None]:
df_ride.info()

In [None]:
# Sum the total rows of missing values from each attribute
missing_values = df_ride.isnull().sum()

# Return only attrbutes with missing values
missing_values[missing_values > 0]

In [None]:
# Remove attributes where 50% or more data is missing
null_threshold = len(df_ride) * 0.5
df_ride_clean = df_ride.dropna(thresh=null_threshold, axis=1)

In [None]:
# Columns containing missing val less than 50% of attribute
missing_val_cols = df_ride_clean.columns[(df_ride_clean.isnull().mean() > 0) & (df_ride_clean.isnull().mean() < 0.5)].tolist()



In [None]:
imputer = SimpleImputer(strategy='mean')
df_ride_clean[missing_val_cols] = imputer.fit_transform(df_ride_clean[missing_val_cols])

df_ride_clean.info()

In [None]:
df_ride_clean['Activity Date'] = pd.to_datetime(df_ride_clean['Activity Date'], format='%d %b %Y, %H:%M:%S')

# Preparing data for time series analysis
df_ride_clean['Month'] = df_ride_clean['Activity Date'].dt.month
df_ride_clean['Weekday'] = df_ride_clean['Activity Date'].dt.weekday
df_ride_clean['Year'] = df_ride_clean['Activity Date'].dt.year


# Average metrics by month
avg_metrics_month = df_ride_clean.groupby('Month')[['Distance', 'Average Speed', 'Calories']].mean()

# Average metrics by weekday
avg_metrics_weekday = df_ride_clean.groupby('Weekday')[['Distance', 'Average Speed', 'Calories']].mean()

In [None]:
df_ride_clean.info()

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Define the rmse function
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

# Split the dataset into features (X) and target variable (y)
X = df_ride_clean[['Distance', 'Elevation Gain', 'Power Count']]
y = df_ride_clean['Moving Time']

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the SVM model
svm_model = SVR(kernel='rbf')  # Radial Basis Function (RBF) kernel is commonly used for SVM regression
svm_model.fit(X_train_scaled, y_train)
# Make predictions on the test set
y_pred = svm_model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
RMSE_svr = rmse(y_test, y_pred)

print("Performance Metrics of SVR :")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R-squared Score (R2): {r2:.2f}")
print(f"Root Mean Squared Error (RMSE): {RMSE_svr:.2f}")