In [34]:
## Importing Pandas
import pandas as pd

In [35]:
## Series
# Series is a one-dimensional labeled array capable of holding any data type.
# It can be created using a list, dictionary, or NumPy array.
# Example of creating a Series from a list:
data = [1, 2, 3, 4, 5]
series_from_list = pd.Series(data)
print(series_from_list)
print('Type of series_from_list:', type(series_from_list))

0    1
1    2
2    3
3    4
4    5
dtype: int64
Type of series_from_list: <class 'pandas.core.series.Series'>


In [36]:
## Create a Series from a dictionary:
data_dict = {'a': 1, 'b': 2, 'c': 3}
series_from_dict = pd.Series(data_dict)
print(series_from_dict)
print('Type of series_from_dict:', type(series_from_dict))

a    1
b    2
c    3
dtype: int64
Type of series_from_dict: <class 'pandas.core.series.Series'>


In [37]:
data = [10,20,30]
index = ['a', 'b', 'c']
pd.Series(data, index=index)

a    10
b    20
c    30
dtype: int64

In [38]:
## DataFrame
# DataFrame is a two-dimensional labeled data structure with columns of potentially different types.
# It can be thought of as a table or a spreadsheet.

## Create a DataFrame from a dictionary:
data_dict = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}
data_frame_from_dict = pd.DataFrame(data_dict)
print(data_frame_from_dict)
print('Type of data_frame_from_dict:', type(data_frame_from_dict))

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
Type of data_frame_from_dict: <class 'pandas.core.frame.DataFrame'>


In [39]:
## Create a DataFrame from a list of dictionaries:
data_list_of_dicts = [
    {'Name': 'Alice', 'Age': 25, 'City': 'New York'},
    {'Name': 'Bob', 'Age': 30, 'City': 'Los Angeles'},
    {'Name': 'Charlie', 'Age': 35, 'City': 'Chicago'}
]
data_frame_from_list_of_dicts = pd.DataFrame(data_list_of_dicts)
print(data_frame_from_list_of_dicts)
print('Type of data_frame_from_list_of_dicts:', type(data_frame_from_list_of_dicts))

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
Type of data_frame_from_list_of_dicts: <class 'pandas.core.frame.DataFrame'>


In [40]:
df = pd.read_csv('Online_Sales_Data.csv')
df.head()  # Display the first few rows of the DataFrame

Unnamed: 0,Transaction ID,Date,Product Category,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
0,10001,2024-01-01,Electronics,iPhone 14 Pro,2,999.99,1999.98,North America,Credit Card
1,10002,2024-01-02,Home Appliances,Dyson V11 Vacuum,1,499.99,499.99,Europe,PayPal
2,10003,2024-01-03,Clothing,Levi's 501 Jeans,3,69.99,209.97,Asia,Debit Card
3,10004,2024-01-04,Books,The Da Vinci Code,4,15.99,63.96,North America,Credit Card
4,10005,2024-01-05,Beauty Products,Neutrogena Skincare Set,1,89.99,89.99,Europe,PayPal


In [41]:
df.tail()  # Display the last few rows of the DataFrame

Unnamed: 0,Transaction ID,Date,Product Category,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
235,10236,2024-08-23,Home Appliances,Nespresso Vertuo Next Coffee and Espresso Maker,1,159.99,159.99,Europe,PayPal
236,10237,2024-08-24,Clothing,Nike Air Force 1 Sneakers,3,90.0,270.0,Asia,Debit Card
237,10238,2024-08-25,Books,The Handmaid's Tale by Margaret Atwood,3,10.99,32.97,North America,Credit Card
238,10239,2024-08-26,Beauty Products,Sunday Riley Luna Sleeping Night Oil,1,55.0,55.0,Europe,PayPal
239,10240,2024-08-27,Sports,Yeti Rambler 20 oz Tumbler,2,29.99,59.98,Asia,Credit Card


In [42]:
df1 = data_frame_from_dict

In [43]:
df1

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


In [44]:
df1['Name']  # Access a specific column by name

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object

In [45]:
type(df1['Name'])

pandas.core.series.Series

In [46]:
df1.loc[0]  # Access the first row using label-based indexing

Name       Alice
Age           25
City    New York
Name: 0, dtype: object

In [47]:
df1.iloc[0]  # Access the first row using integer-based indexing

Name       Alice
Age           25
City    New York
Name: 0, dtype: object

In [48]:
df1.iloc[0][1] # Access a specific value in the first row and second column

25

In [49]:
df1

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


In [50]:
## Accessing Specific Rows and Columns
# You can access specific rows and columns in a DataFrame using the `loc` and `

df1.at[1,'Age'] # Access the 'Age' of the second row using label-based indexing

30

In [51]:
df1.at[2, 'City']  # Access the 'City' of the third row using label-based indexing

'Chicago'

In [52]:
## Accessing a specified element using `iat`:
df1.iat[1, 2]  # Access the 'City' of the second row using integer-based indexing

'Los Angeles'

In [53]:
## Data Manipulation with DataFrame
# You can perform various operations on DataFrames, such as filtering, sorting, and aggregating
# Example of filtering rows based on a condition:
filtered_df = df1[df1['Age'] > 30]  # Filter rows where 'Age' is greater than 30
print(filtered_df)

      Name  Age     City
2  Charlie   35  Chicago


In [54]:
## Adding a new column to the DataFrame:
df1['Salary'] = [50000, 60000, 70000]
df1

Unnamed: 0,Name,Age,City,Salary
0,Alice,25,New York,50000
1,Bob,30,Los Angeles,60000
2,Charlie,35,Chicago,70000


In [55]:
## Removing a column from the DataFrame:
df1.drop('Salary', axis=1, inplace=True)  # Remove the 'Salary' column
df1

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


In [56]:
## Add 'age' to the DataFrame:
df1['Age'] = df1['Age']+10
df1

Unnamed: 0,Name,Age,City
0,Alice,35,New York
1,Bob,40,Los Angeles
2,Charlie,45,Chicago


In [57]:
df

Unnamed: 0,Transaction ID,Date,Product Category,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
0,10001,2024-01-01,Electronics,iPhone 14 Pro,2,999.99,1999.98,North America,Credit Card
1,10002,2024-01-02,Home Appliances,Dyson V11 Vacuum,1,499.99,499.99,Europe,PayPal
2,10003,2024-01-03,Clothing,Levi's 501 Jeans,3,69.99,209.97,Asia,Debit Card
3,10004,2024-01-04,Books,The Da Vinci Code,4,15.99,63.96,North America,Credit Card
4,10005,2024-01-05,Beauty Products,Neutrogena Skincare Set,1,89.99,89.99,Europe,PayPal
...,...,...,...,...,...,...,...,...,...
235,10236,2024-08-23,Home Appliances,Nespresso Vertuo Next Coffee and Espresso Maker,1,159.99,159.99,Europe,PayPal
236,10237,2024-08-24,Clothing,Nike Air Force 1 Sneakers,3,90.00,270.00,Asia,Debit Card
237,10238,2024-08-25,Books,The Handmaid's Tale by Margaret Atwood,3,10.99,32.97,North America,Credit Card
238,10239,2024-08-26,Beauty Products,Sunday Riley Luna Sleeping Night Oil,1,55.00,55.00,Europe,PayPal


In [61]:
## Display the data of each column:
print('Data type of each column:\n', df.dtypes)

## Describe the DataFrame:
print('Descriptive statistics of the DataFrame:\n', df.describe())

Data type of each column:
 Transaction ID        int64
Date                 object
Product Category     object
Product Name         object
Units Sold            int64
Unit Price          float64
Total Revenue       float64
Region               object
Payment Method       object
dtype: object
Descriptive statistics of the DataFrame:
        Transaction ID  Units Sold   Unit Price  Total Revenue
count       240.00000  240.000000   240.000000     240.000000
mean      10120.50000    2.158333   236.395583     335.699375
std          69.42622    1.322454   429.446695     485.804469
min       10001.00000    1.000000     6.500000       6.500000
25%       10060.75000    1.000000    29.500000      62.965000
50%       10120.50000    2.000000    89.990000     179.970000
75%       10180.25000    3.000000   249.990000     399.225000
max       10240.00000   10.000000  3899.990000    3899.990000
