In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
np.random.seed(1)
n = 1000

dates = pd.date_range(start='2050-01-01', periods=n, freq='D')
products = np.random.choice(['Tablet', 'Camera', 'Phone', 'Microphone', 'Keyboard'], n)
prices = np.round(np.random.uniform(50.0, 200.0, n), 2)
quantities = np.random.randint(1, 15, n)

data = {
    'Date': dates,
    'Product': products,
    'Price': prices,
    'Quantities': quantities,
}

df = pd.DataFrame(data)
df


Unnamed: 0,Date,Product,Price,Quantities
0,2050-01-01,Microphone,90.30,1
1,2050-01-02,Keyboard,171.24,13
2,2050-01-03,Tablet,94.29,4
3,2050-01-04,Camera,131.62,7
4,2050-01-05,Microphone,123.19,5
...,...,...,...,...
995,2052-09-22,Tablet,187.70,9
996,2052-09-23,Tablet,175.51,11
997,2052-09-24,Keyboard,132.81,8
998,2052-09-25,Keyboard,180.14,3


In [3]:
df.dtypes

Date          datetime64[ns]
Product               object
Price                float64
Quantities             int32
dtype: object

In [4]:
df.to_csv

<bound method NDFrame.to_csv of           Date     Product   Price  Quantities
0   2050-01-01  Microphone   90.30           1
1   2050-01-02    Keyboard  171.24          13
2   2050-01-03      Tablet   94.29           4
3   2050-01-04      Camera  131.62           7
4   2050-01-05  Microphone  123.19           5
..         ...         ...     ...         ...
995 2052-09-22      Tablet  187.70           9
996 2052-09-23      Tablet  175.51          11
997 2052-09-24    Keyboard  132.81           8
998 2052-09-25    Keyboard  180.14           3
999 2052-09-26       Phone  177.48          10

[1000 rows x 4 columns]>

In [5]:
df.to_csv('random_sales.csv', index=True)

In [6]:
df = pd.read_csv('random_sales.csv', parse_dates=['Date'], date_format='%Y-%m-%d')
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Product,Price,Quantities
0,0,2050-01-01,Microphone,90.3,1
1,1,2050-01-02,Keyboard,171.24,13
2,2,2050-01-03,Tablet,94.29,4
3,3,2050-01-04,Camera,131.62,7
4,4,2050-01-05,Microphone,123.19,5


In [7]:
df.dtypes

Unnamed: 0             int64
Date          datetime64[ns]
Product               object
Price                float64
Quantities             int64
dtype: object

In [8]:
df.to_excel('sales_data.xlsx')

ModuleNotFoundError: No module named 'openpyxl'

In [9]:
df.to_json('sales_data.json')

In [None]:
df.shape

In [None]:
df.describe().round(2)

In [None]:
df.values[:5]

In [None]:
product_sales = df.groupby('Product')['Quantities'].sum()
product_sales

In [None]:
average_prices = df.groupby('Product')['Price'].mean().round(2)
average_prices

In [None]:
plt.bar(product_sales.index, product_sales.values, color=['red', 'green', 'blue'])
plt.title('Total Sales per product')
plt.xlabel('Product')
plt.ylabel('Total Sales')
plt.show()

In [None]:
plt.plot(average_prices.index, average_prices.values)
plt.title('Average Price per Product')
plt.xlabel('Product')
plt.ylabel('Average Price')
plt.show()