In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Load the dataset
file_path = 'top_10_samp.csv'
data = pd.read_csv(file_path)

In [None]:

# Define the haversine function
def haversine(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    return 3958.8 * 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))  # Distance in miles



In [None]:
# Apply the haversine function to calculate the distance in miles
data['calculated_dist_miles'] = data.apply(
    lambda row: haversine(row['lat'], row['lon'], row['lot_lat'], row['lot_lon']),
    axis=1
)

# Convert `tow_dist` from meters to miles for comparison
data['tow_dist_miles'] = data['tow_dist'] / 1609.34

# Calculate the absolute and percentage differences (in miles)
data['absolute_difference_miles'] = abs(data['calculated_dist_miles'] - data['tow_dist_miles'])
data['percentage_difference_miles'] = (data['absolute_difference_miles'] / data['tow_dist_miles']) * 100

# Select relevant columns for comparison
comparison_df_miles = data[['tow_dist_miles', 'calculated_dist_miles', 'absolute_difference_miles', 'percentage_difference_miles']]

# Display the comparison results in miles
print("Comparison Results (Miles):")
print(comparison_df_miles)



In [None]:
# Apply a reaction factor of 1.3 to the calculated Haversine distance (in miles)
reaction_factor = 1.3
data['adjusted_dist_miles'] = data['calculated_dist_miles'] * reaction_factor

# Calculate the new absolute and percentage differences with the adjusted distance (in miles)
data['adjusted_absolute_difference_miles'] = abs(data['adjusted_dist_miles'] - data['tow_dist_miles'])
data['adjusted_percentage_difference_miles'] = (data['adjusted_absolute_difference_miles'] / data['tow_dist_miles']) * 100

# Select relevant columns for the adjusted comparison
adjusted_comparison_df_miles = data[['tow_dist_miles', 'calculated_dist_miles', 'adjusted_dist_miles',
                                      'adjusted_absolute_difference_miles', 'adjusted_percentage_difference_miles']]

# Display the adjusted comparison results in miles
print("Adjusted Comparison Results (Miles):")
print(adjusted_comparison_df_miles)


In [None]:
reaction_factor = 1.4

# Use the existing 'calculated_dist_miles' column instead of 'calculated_dist_meters'
#data['calculated_dist_miles'] = data['calculated_dist_meters'] / 1609.34 # This line caused the error
data['adjusted_dist_miles'] = data['calculated_dist_miles'] * reaction_factor

# Convert `tow_dist` from meters to miles for comparison
data['tow_dist_miles'] = data['tow_dist'] / 1609.34

# Calculate absolute differences in miles
data['absolute_difference_miles'] = abs(data['calculated_dist_miles'] - data['tow_dist_miles'])
data['adjusted_absolute_difference_miles'] = abs(data['adjusted_dist_miles'] - data['tow_dist_miles'])

# Calculate the average absolute difference without and with the reaction factor (in miles)
average_difference_without_reaction = data['absolute_difference_miles'].mean()
average_difference_with_reaction = data['adjusted_absolute_difference_miles'].mean()

# Create a summary
average_differences_summary = pd.DataFrame({
    'Type': ['Without Reaction Factor', 'With Reaction Factor'],
    'Average Absolute Difference (miles)': [
        average_difference_without_reaction,
        average_difference_with_reaction
    ]
})

# Display the summary
print("Average Absolute Difference Comparison (in miles):")
print(average_differences_summary)

# Optionally, save to a CSV file for review
average_differences_summary.to_csv('average_differences_summary_miles.csv', index=False)