In [None]:
import pandas as pd
from tkinter import Tk
from tkinter.filedialog import askopenfilename
import os
import matplotlib.pyplot as plt

def select_file():
    Tk().withdraw()  # Close the root window
    file_path = askopenfilename(filetypes=[("CSV files", "*.csv")])
    return file_path

# Clone the repository (if not already cloned)
def clone_repo():
    repo_url = "https://github.com/FADIBAS/PDA-bike-store-sales.git"
    repo_name = "PDA-bike-store-sales"
    if not os.path.exists(repo_name):
        print(f"Cloning repository from {repo_url}...")
        os.system(f"git clone {repo_url}")
    else:
        print("Repository already cloned.")
    return repo_name

# Load data from the repository
def load_data_from_repo(repo_name):
    file_path = os.path.join(repo_name, "BikeStoreSales.csv")
    if os.path.exists(file_path):
        return pd.read_csv(file_path)
    else:
        raise FileNotFoundError(f"File not found in repository: {file_path}")

# Clone the repository and load data
repo_name = clone_repo()
data = load_data_from_repo(repo_name)

# 1. Preview the data
print("Preview of the data:")
print(data.head())

print("\nSummary of the data:")
print(data.info())

print("\nStatistics of numeric columns:")
print(data.describe())

# 2. Clean the data (if necessary)
if data.isnull().sum().any():
    print("\nMissing values detected. Cleaning data...")
    data = data.dropna()
    print("Data cleaned!")

# 3. Analyze the data
# Total sales
total_sales = data['Sales'].sum()
print(f"\nTotal Sales: {total_sales}")

# Sales by product
sales_by_product = data.groupby('Product')['Sales'].sum()
print("\nSales by Product:")
print(sales_by_product)

# Sales by region
sales_by_region = data.groupby('Region')['Sales'].sum()
print("\nSales by Region:")
print(sales_by_region)

# 4. Visualize the data
# Plot sales by product
sales_by_product.plot(kind='bar', title="Sales by Product")
plt.ylabel('Sales')
plt.xlabel('Product')
plt.show()

# Plot sales by region
sales_by_region.plot(kind='pie', title="Sales by Region", autopct='%1.1f%%')
plt.ylabel('')
plt.show()

# Add additional analysis as needed
