In [None]:

# Auto-install required libraries if not already installed
import subprocess
import sys

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

required_packages = ["numpy", "pandas", "scipy", "statsmodels", "matplotlib", "seaborn"]
for package in required_packages:
    try:
        __import__(package)
    except ImportError:
        install(package)


In [None]:
# Motorcycle Data Analysis Project

In [None]:
## 1. Data Creation and Cleaning

In [None]:
import numpy as np
import pandas as pd
import random

In [None]:
# Set seed for reproducibility
np.random.seed(42)

In [None]:
# Motorcycle types and price ranges
motorcycle_types = {
    'Standard': (4.19e4, 3e5),
    'Cruiser': (8.59e4, 4e6),
    'Sport': (1.56e5, 1.37e6),
    'Touring': (4.99e5, 2.44e6),
    'Off-Road': (8.88e4, 7.7e5),
    'Underbone': (2.99e4, 4e6),
    'Adventure': (6.9e4, 1.795e6),
    'Scooter': (6.69e4, 8.49e5),
    'Dual-Sport': (2.19e5, 7.95e5),
    'Cafe Racer': (1.75e5, 1.135e6)
}

In [None]:
n_samples = 120
data = []

In [None]:
for _ in range(n_samples):
    m_type = random.choice(list(motorcycle_types.keys()))
    price_min, price_max = motorcycle_types[m_type]
    price = np.random.uniform(price_min, price_max)
    engine_size = np.random.normal(loc=600, scale=200)
    horsepower = np.random.normal(loc=engine_size / 10, scale=5)
    weight = np.random.normal(loc=180, scale=30)
    fuel_efficiency = np.random.normal(loc=30 - engine_size / 100, scale=2)
    data.append([m_type, price, horsepower, engine_size, weight, fuel_efficiency])

In [None]:
df = pd.DataFrame(data, columns=[
    'Type', 'Price', 'Horsepower', 'Engine_Size', 'Weight', 'Fuel_Efficiency'
])

In [None]:
# Clean data
df = df.dropna()
df = df[df['Price'] > 0]
df = df[df['Engine_Size'] > 50]

In [None]:
print("Cleaned data sample:")
print(df.head())
print(f"Total clean records: {len(df)}")

In [None]:

## 2. NumPy Operations

In [None]:
# 1. Mean price
mean_price = np.mean(df['Price'])

In [None]:
# 2. Standard deviation of engine size
std_engine = np.std(df['Engine_Size'])

In [None]:
# 3. Normalize horsepower
normalized_hp = (df['Horsepower'] - df['Horsepower'].mean()) / df['Horsepower'].std()

In [None]:
# 4. Filter motorcycles with above-average weight
heavy_bikes = df[df['Weight'] > np.mean(df['Weight'])]

In [None]:
# 5. Adjust prices for 5% inflation
inflated_prices = df['Price'] * 1.05

In [None]:
print("\nNumPy operations complete.")
print(f"Mean Price: {mean_price:.2f}")
print(f"Standard Deviation of Engine Size: {std_engine:.2f}")
print(f"Inflated Price Sample: {inflated_prices.head()}")

In [None]:

## 3. SciPy Operation

In [None]:
from scipy.stats import pearsonr
corr, p_value = pearsonr(df['Price'], df['Horsepower'])

In [None]:
print("\nSciPy Pearson Correlation:")
print(f"Correlation: {corr:.2f}, P-value: {p_value:.4f}")

In [None]:

## 4. Pandas Operation

In [None]:
avg_hp_by_type = df.groupby('Type')['Horsepower'].mean()
print("\nAverage Horsepower by Motorcycle Type:")
print(avg_hp_by_type)

In [None]:

## 5. Statsmodels Regression

In [None]:
import statsmodels.api as sm

In [None]:
X = sm.add_constant(df['Horsepower'])  # adds intercept
model = sm.OLS(df['Price'], X).fit()
print("\nStatsmodels Linear Regression Summary:")
print(model.summary())

In [None]:

## 6. Visualizations

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Boxplot of Price by Type
plt.figure(figsize=(12,6))
sns.boxplot(data=df, x='Type', y='Price')
plt.xticks(rotation=45)
plt.title("Boxplot of Price by Motorcycle Type")
plt.tight_layout()
plt.show()

In [None]:
# Histogram of Fuel Efficiency
plt.figure(figsize=(8,5))
sns.histplot(df['Fuel_Efficiency'], kde=True)
plt.title("Distribution of Fuel Efficiency")
plt.show()

In [None]:
# Regression Plot: Price vs Horsepower
plt.figure(figsize=(8,5))
sns.regplot(x='Horsepower', y='Price', data=df, line_kws={"color":"red"})
plt.title("Correlation between Horsepower and Price")
plt.show()