In [6]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("Sales_data.csv") # Initialize DataFrame

df = df[["City", "Date", "gross income"]] # Remove unnecessary columns

df.dropna(subset=["Date"], inplace=True) # Delete rows with missing dates

df.drop_duplicates(subset=None, keep="first", inplace=True) # Check for and remove duplicate rows if present

df["Date"] = pd.to_datetime(df["Date"], format="%m/%d/%Y") # Ensure Date column is in DateTime format

df["Date"] =  df["Date"].sort_values(ascending=True).values # Sort dates in ascending order

def round_off(number):
    return round(number, 2)

df["gross income"] = df["gross income"].apply(round_off) # Round off gross income values to 2 decimals

# Breakdown Amount of Sales per City
city_groups = df.groupby("City").count()
print(city_groups["gross income"]) # Print the amount of sales per city

df.drop_duplicates(subset="Date", keep="first", inplace=True) # Remove duplicate dates

City
Mandalay     332
Naypyitaw    328
Yangon       340
Name: gross income, dtype: int64


In [3]:
df.head()

Unnamed: 0,City,Date,gross income
0,Yangon,2019-01-01,26.14
12,Yangon,2019-01-02,11.74
20,Naypyitaw,2019-01-03,21.51
28,Mandalay,2019-01-04,22.09
34,Naypyitaw,2019-01-05,19.88


In [13]:
season = df[df["Date"].dt.year == 2019] # One month to check for seasonlity

# Monthly Sales Trends
monthly_sales_trends_fig = px.line(df, x=season["Date"], y=df["gross income"][:len(season)], title="Monthly Seasonality Trend")
monthly_sales_trends_fig.update_layout(
    xaxis_title = "Date (Day)",
    yaxis_title = "Gross Income"
)

monthly_sales_trends_fig.show()