In [49]:
import pandas as pd
import numpy as np

In [50]:
np.random.seed(1)
n = 1000

dates = pd.date_range(start='2050-01-01', periods=n, freq='D')
products = np.random.choice(['Tablet', 'Camera', 'Phone', 'Microphone', 'Keyboard'], n)
prices = np.round(np.random.uniform(50.0, 200.0, n), 2)
quantities = np.random.randint(1, 15, n)

data = {
    'Date': dates,
    'Product': products,
    'Price': prices,
    'Quantities': quantities,
}

df = pd.DataFrame(data)
df


Unnamed: 0,Date,Product,Price,Quantities
0,2050-01-01,Microphone,90.30,1
1,2050-01-02,Keyboard,171.24,13
2,2050-01-03,Tablet,94.29,4
3,2050-01-04,Camera,131.62,7
4,2050-01-05,Microphone,123.19,5
...,...,...,...,...
995,2052-09-22,Tablet,187.70,9
996,2052-09-23,Tablet,175.51,11
997,2052-09-24,Keyboard,132.81,8
998,2052-09-25,Keyboard,180.14,3


In [51]:
df.dtypes

Date          datetime64[ns]
Product               object
Price                float64
Quantities             int32
dtype: object

In [52]:
df.to_csv

<bound method NDFrame.to_csv of           Date     Product   Price  Quantities
0   2050-01-01  Microphone   90.30           1
1   2050-01-02    Keyboard  171.24          13
2   2050-01-03      Tablet   94.29           4
3   2050-01-04      Camera  131.62           7
4   2050-01-05  Microphone  123.19           5
..         ...         ...     ...         ...
995 2052-09-22      Tablet  187.70           9
996 2052-09-23      Tablet  175.51          11
997 2052-09-24    Keyboard  132.81           8
998 2052-09-25    Keyboard  180.14           3
999 2052-09-26       Phone  177.48          10

[1000 rows x 4 columns]>

In [53]:
df.to_csv('random_sales.csv', index=True)

In [54]:
df = pd.read_csv('random_sales.csv', parse_dates=['Date'], date_format='%Y-%m-%d')
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Product,Price,Quantities
0,0,2050-01-01,Microphone,90.3,1
1,1,2050-01-02,Keyboard,171.24,13
2,2,2050-01-03,Tablet,94.29,4
3,3,2050-01-04,Camera,131.62,7
4,4,2050-01-05,Microphone,123.19,5


In [55]:
df.dtypes

Unnamed: 0             int64
Date          datetime64[ns]
Product               object
Price                float64
Quantities             int64
dtype: object

In [56]:
df.to_excel('sales_data.xlsx')

In [57]:
df.to_json('sales_data.json')

In [58]:
df.shape

(1000, 5)

In [59]:
df.describe().round(2)

Unnamed: 0.1,Unnamed: 0,Date,Price,Quantities
count,1000.0,1000,1000.0,1000.0
mean,499.5,2051-05-15 11:59:59.999999488,126.46,7.49
min,0.0,2050-01-01 00:00:00,50.11,1.0
25%,249.75,2050-09-07 18:00:00,88.15,4.0
50%,499.5,2051-05-15 12:00:00,127.22,8.0
75%,749.25,2052-01-20 06:00:00,164.99,11.0
max,999.0,2052-09-26 00:00:00,199.78,14.0
std,288.82,,43.89,4.01


In [60]:
df.values[:5]

array([[0, Timestamp('2050-01-01 00:00:00'), 'Microphone', 90.3, 1],
       [1, Timestamp('2050-01-02 00:00:00'), 'Keyboard', 171.24, 13],
       [2, Timestamp('2050-01-03 00:00:00'), 'Tablet', 94.29, 4],
       [3, Timestamp('2050-01-04 00:00:00'), 'Camera', 131.62, 7],
       [4, Timestamp('2050-01-05 00:00:00'), 'Microphone', 123.19, 5]],
      dtype=object)

In [61]:
df.groupby('Product')['Quantities'].sum()

Product
Camera        1451
Keyboard      1501
Microphone    1444
Phone         1610
Tablet        1487
Name: Quantities, dtype: int64

In [62]:
df.groupby('Product')['Price'].mean().round(2)

Product
Camera        124.52
Keyboard      127.64
Microphone    124.61
Phone         127.39
Tablet        127.90
Name: Price, dtype: float64