In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the datasets
file_path_working = 'goal_ticks_working.csv'
file_path_not_working = 'goal_ticks_not_working.csv'
file_path_new = 'goal_ticks_new_kartID.csv'

# Define column names based on the provided structure
column_names = [
    "kart_ID", "ball_X", "ball_Z", "ball_aim_X", "ball_aim_Z", "previous_X", "previous_Z",
    "dist_to_ball", "kart_X", "kart_Z", "vel_X", "vel_Z", "speed", "steer", "accel", "brake", "skid", "time", "goal"
]

# Read the datasets with the expected number of columns
def read_dataset(file_path, column_names):
    df = pd.read_csv(file_path, header=None)
    if df.shape[1] == len(column_names):
        df.columns = column_names
    else:
        raise ValueError(f"Dataset at {file_path} does not match the expected column structure.")
    return df

df_working = read_dataset(file_path_working, column_names)
df_not_working = read_dataset(file_path_not_working, column_names)
df_new = read_dataset(file_path_new, column_names)

# Print the first few rows
print("Working dataset:")
print(df_working.head())

print("Not working dataset:")
print(df_not_working.head())

print("New dataset:")
print(df_new.head())

# Compare feature statistics
print("Working dataset statistics:")
print(df_working.describe())

print("Not working dataset statistics:")
print(df_not_working.describe())

print("New dataset statistics:")
print(df_new.describe())

# Check normalization parameters
scaler_working = StandardScaler()
X_scaled_working = scaler_working.fit_transform(df_working.iloc[:, :-1])

scaler_not_working = StandardScaler()
X_scaled_not_working = scaler_not_working.fit_transform(df_not_working.iloc[:, :-1])

# Normalize the new dataset using the working dataset scaler parameters
X_scaled_new = scaler_working.transform(df_new.iloc[:, :-1])

print("Scaler parameters for the working dataset:")
print("Mean:", scaler_working.mean_)
print("Scale:", scaler_working.scale_)

# Plot histograms for each feature in all datasets
for i in range(df_working.shape[1] - 1):  # Exclude the target column
    plt.figure(figsize=(15, 4))

    plt.subplot(1, 3, 1)
    plt.hist(df_working.iloc[:, i], bins=50, alpha=0.5, label='Working Dataset')
    plt.title(f'Feature {i} - Working Dataset')

    plt.subplot(1, 3, 2)
    plt.hist(df_not_working.iloc[:, i], bins=50, alpha=0.5, label='Not Working Dataset')
    plt.title(f'Feature {i} - Not Working Dataset')

    plt.subplot(1, 3, 3)
    plt.hist(df_new.iloc[:, i], bins=50, alpha=0.5, label='New Dataset')
    plt.title(f'Feature {i} - New Dataset')

    plt.show()


NameError: name 'file_path_not_working' is not defined