In [1]:
# 1 - Confirming I have pandas installed

import pandas as pd
print(pd.__version__)

2.2.2


In [2]:
# 2 - Creating a series from a dictionary

sales_data = {
    'product': ['Laptop', 'Smartphone', 'Tablet'],
    'sales': [150, 200, 100],
    'revenue': [90000, 80000, 30000]
}

df = pd.DataFrame(sales_data)
print(df)

      product  sales  revenue
0      Laptop    150    90000
1  Smartphone    200    80000
2      Tablet    100    30000


In [3]:
# 3 - Creating a Sample CSV file
import numpy as np

def create_sample_csv(filename='sample_data.csv'):
    header_row = ['Date', 'Product', 'Quantity', 'Price']
    data_rows = [['2024-01-01', 'Laptop', 10, 900],
                ['2024-01-01', 'Smartphone', 20, 400],
                ['2024-01-02', 'Tablet', 5, 600]]
    
    data_rows_series = np.array(data_rows)
    # print(data_rows)

    # data = {header_row[0]: data_rows_series[:,0],
    # header_row[1]: data_rows_series[:,1],
    # header_row[2]: data_rows_series[:,2],
    # header_row[3]: data_rows_series[:,3]}
    
    data = {}
    for i in range(4): data[header_row[i]] = data_rows_series[:, i]

    df = pd.DataFrame(data)
    print(df)
    df.to_csv(filename, index=False)

create_sample_csv()

         Date     Product Quantity Price
0  2024-01-01      Laptop       10   900
1  2024-01-01  Smartphone       20   400
2  2024-01-02      Tablet        5   600


In [4]:
# OR Creating a Sample CSV file

def create_sample_csv(filename='sample_cursor.csv'):
    header_row = ['Date', 'Product', 'Quantity', 'Price']
    data_rows = [['2024-01-01', 'Laptop', 10, 900],
                ['2024-01-01', 'Smartphone', 20, 400],
                ['2024-01-02', 'Tablet', 5, 600]]
    
    # Create DataFrame from the data
    df = pd.DataFrame(data_rows, columns=header_row)
    print(df)
    
    # Save to CSV file
    df.to_csv(filename, index=False)
    print(f"\nCSV file '{filename}' created successfully!")

create_sample_csv()

         Date     Product  Quantity  Price
0  2024-01-01      Laptop        10    900
1  2024-01-01  Smartphone        20    400
2  2024-01-02      Tablet         5    600

CSV file 'sample_cursor.csv' created successfully!


In [5]:
# 4 - Read a CSV File into a DataFrame

df = pd.read_csv('sales_data.csv')
print(df)

   product_name  sales_figure
0     Product A           117
1     Product B           650
2     Product C           861
3     Product D           792
4     Product E           272
5     Product F           303
6     Product G           222
7     Product H           280
8     Product I           166
9     Product J           514
10    Product K           401
11    Product L           196
12    Product M           740
13    Product N           531
14    Product O           531
15    Product P           330
16    Product Q           164
17    Product R           459
18    Product S           601
19    Product T           856
20    Product U           108
21    Product V           879
22    Product W           338
23    Product X           537
24    Product Y           104
25    Product Z           261


In [6]:
# 5a - Inspect the Dataframe for sample_data.csv

df = pd.read_csv('sample_data.csv')
print(df.head())
print()
print(df.info())
print()
print(df.describe())

         Date     Product  Quantity  Price
0  2024-01-01      Laptop        10    900
1  2024-01-01  Smartphone        20    400
2  2024-01-02      Tablet         5    600

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Date      3 non-null      object
 1   Product   3 non-null      object
 2   Quantity  3 non-null      int64 
 3   Price     3 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 228.0+ bytes
None

        Quantity       Price
count   3.000000    3.000000
mean   11.666667  633.333333
std     7.637626  251.661148
min     5.000000  400.000000
25%     7.500000  500.000000
50%    10.000000  600.000000
75%    15.000000  750.000000
max    20.000000  900.000000


In [7]:
# 5b - Inspect the Dataframe for sample_data.

df = pd.read_csv('sales_data.csv')
print(df.head())
print()
print(df.info())
print()
print(df.describe())

  product_name  sales_figure
0    Product A           117
1    Product B           650
2    Product C           861
3    Product D           792
4    Product E           272

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   product_name  26 non-null     object
 1   sales_figure  26 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 548.0+ bytes
None

       sales_figure
count     26.000000
mean     431.269231
std      250.694165
min      104.000000
25%      231.750000
50%      369.500000
75%      585.000000
max      879.000000


In [8]:
# 6 - Specifying Data Types for sample_cursor.csv

dtype_dict = {
    'Quantity': 'int32',
    'Price': 'float32'
}

df = pd.read_csv('sample_cursor.csv', dtype=dtype_dict)
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      3 non-null      object 
 1   Product   3 non-null      object 
 2   Quantity  3 non-null      int32  
 3   Price     3 non-null      float32
dtypes: float32(1), int32(1), object(2)
memory usage: 204.0+ bytes
None


In [9]:
# 6b - Specifying Data Types for sales_data.csv

dtype_dict = {'sales_figure': 'float32'}

df = pd.read_csv('sales_data.csv', dtype=dtype_dict)
print(df)
print()
print(df.info)

   product_name  sales_figure
0     Product A         117.0
1     Product B         650.0
2     Product C         861.0
3     Product D         792.0
4     Product E         272.0
5     Product F         303.0
6     Product G         222.0
7     Product H         280.0
8     Product I         166.0
9     Product J         514.0
10    Product K         401.0
11    Product L         196.0
12    Product M         740.0
13    Product N         531.0
14    Product O         531.0
15    Product P         330.0
16    Product Q         164.0
17    Product R         459.0
18    Product S         601.0
19    Product T         856.0
20    Product U         108.0
21    Product V         879.0
22    Product W         338.0
23    Product X         537.0
24    Product Y         104.0
25    Product Z         261.0

<bound method DataFrame.info of    product_name  sales_figure
0     Product A         117.0
1     Product B         650.0
2     Product C         861.0
3     Product D         792.0
4     P

In [10]:
# 6b - Specifying Data Types for sales_data.csv

dtype_dict = {'sales_figure': 'float32'}

df = pd.read_csv('sales_data.csv', dtype=dtype_dict)
print(df)
print()
print(df.info())
print("\nNotice the DIFFERENCE?")

   product_name  sales_figure
0     Product A         117.0
1     Product B         650.0
2     Product C         861.0
3     Product D         792.0
4     Product E         272.0
5     Product F         303.0
6     Product G         222.0
7     Product H         280.0
8     Product I         166.0
9     Product J         514.0
10    Product K         401.0
11    Product L         196.0
12    Product M         740.0
13    Product N         531.0
14    Product O         531.0
15    Product P         330.0
16    Product Q         164.0
17    Product R         459.0
18    Product S         601.0
19    Product T         856.0
20    Product U         108.0
21    Product V         879.0
22    Product W         338.0
23    Product X         537.0
24    Product Y         104.0
25    Product Z         261.0

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 

In [12]:
print(df.describe())

       sales_figure
count     26.000000
mean     431.269226
std      250.694168
min      104.000000
25%      231.750000
50%      369.500000
75%      585.000000
max      879.000000
