In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the Excel file
file_path = 'Dataset for projectt.xlsx'  # Replace with your file name
excel_data = pd.ExcelFile(file_path)

# Load data from the first sheet
data = excel_data.parse('Sheet1')

# Skip irrelevant rows and assign proper column names
data_cleaned = excel_data.parse('Sheet1', skiprows=3)
data_cleaned.columns = data_cleaned.iloc[0]  # Set the first valid row as headers
data_cleaned = data_cleaned[1:]  # Drop the header row from data

# Convert numerical columns to numeric types
data_cleaned['Price'] = pd.to_numeric(data_cleaned['Price'], errors='coerce')
data_cleaned['Diameter (cm)'] = pd.to_numeric(data_cleaned['Diameter (cm)'], errors='coerce')
data_cleaned['Weight Capacity (kg)'] = pd.to_numeric(data_cleaned['Weight Capacity (kg)'], errors='coerce')

# Example Visualization 1: Price distribution
plt.figure(figsize=(8, 6))
plt.hist(data_cleaned['Price'].dropna(), bins=10, color='skyblue', edgecolor='black')
plt.title('Price Distribution of Gym Balls')
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

# Example Visualization 2: Weight Capacity by Diameter
plt.figure(figsize=(8, 6))
plt.scatter(data_cleaned['Diameter (cm)'], data_cleaned['Weight Capacity (kg)'], c='orange', edgecolor='black')
plt.title('Weight Capacity vs. Diameter')
plt.xlabel('Diameter (cm)')
plt.ylabel('Weight Capacity (kg)')
plt.grid(alpha=0.5)
plt.show()

# Example Visualization 3: Average Price by Material
avg_price_by_material = data_cleaned.groupby('Material')['Price'].mean().sort_values()
plt.figure(figsize=(10, 6))
avg_price_by_material.plot(kind='bar', color='lightgreen')
plt.title('Average Price by Material')
plt.xlabel('Material')
plt.ylabel('Average Price')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()