In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df=pd.read_csv(r"/content/Amazon Sales data.csv")
df

#To show the number of rows and columns
df.shape

#To see all column names
df.columns.values

#To check for NA or missing values
df.isna().sum()

#To Show Some Statistics
df.describe()

#To check data types of columns
data_types=df.dtypes
print(data_types)

#To change data type of date
df['Order Date'] = pd.to_datetime(df['Order Date'])




#monthly sales
monthly_sales = df.groupby(df['Order Date'].dt.month)['Total Revenue'].sum()
print("Monthly Sales Trend:")
print(monthly_sales)

#visualization
monthly_sales_millions = monthly_sales / 1000000
plt.figure(figsize=(10, 6))
plt.plot(monthly_sales.index,monthly_sales_millions.values,marker='o', linestyle='-')
plt.xlabel('Month')
plt.ylabel('Total Revenue(in Million dollars)')
plt.title('Monthly Sales Trend')
plt.grid(True)
plt.xticks(range(1,13))
plt.yticks(range(1,19))
plt.show()

#yearly sales
yearly_sales = df.groupby(df['Order Date'].dt.year)['Total Revenue'].sum()
print("Yearly Sales Trend:")
print(yearly_sales)

#visualization
yearly_sales_millions=yearly_sales/1000000
plt.figure(figsize=(10, 6))
plt.plot(yearly_sales.index, yearly_sales_millions.values, marker='o', linestyle='-')
plt.xlabel('Year')
plt.ylabel('Total Revenue')
plt.title('Yearly Sales Trend')
plt.grid(True)
plt.show()


#yearly-month wise sales
yearly_monthly_sales = df.groupby([df['Order Date'].dt.year, df['Order Date'].dt.month])['Total Revenue'].sum().unstack()
print("Yearly Monthly Sales Trend:")
print(yearly_monthly_sales)

#visualization
fig, ax = plt.subplots(figsize=(10, 6))
for year in yearly_monthly_sales.index:
    yearly_monthly_sales.loc[year].plot(ax=ax, label=f'Year {year}', marker='o')
ax.set_xlabel('Month')
ax.set_ylabel('Total Revenue')
ax.set_title('Monthly and Yearly Sales Trend Over 8 Years')
ax.legend()
plt.show()

#sales distribution by region
region_trends = df.groupby('Region')['Total Revenue'].size().reset_index()
region_trends

#visualization
plt.figure(figsize=(8, 8))
plt.pie(region_trends['Total Revenue'],labels=region_trends['Region'], autopct="%1.1f%%", startangle=140)
plt.title('Sales Distribution by Region')
plt.axis('equal')
plt.tight_layout()
plt.show()


#Top 10 countries sales
country_sales = df.groupby('Country')['Total Revenue'].sum().reset_index()
top_10_countries=country_sales.sort_values(by='Total Revenue', ascending=False).head(10)
top_10_countries

#visualization
plt.figure(figsize=(8, 8))
plt.pie(top_10_countries['Total Revenue'], labels=top_10_countries['Country'], autopct="%1.1f%%", startangle=140)
plt.title('Sales Distribution by Region')
plt.axis('equal')
plt.tight_layout()
plt.show()






#Top 10 countries sales
Item_sales = df.groupby('Item Type')['Units Sold'].sum().reset_index()
Item_sales

#visualization
plt.figure(figsize=(12, 6))
plt.bar(Item_sales['Item Type'], Item_sales['Units Sold'])
plt.xlabel('Item Type')
plt.ylabel('Units Sold')
plt.title('Units Sold by Item Type')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
