🔷 **What is Pandas?**

• **Pandas** is a powerful library in Python used for **data analysis and manipulation**.
• It provides two main data structures:
  ▫ **Series** – A one-dimensional array-like object
  ▫ **DataFrame** – A two-dimensional table-like structure with rows and columns
🔷 **Series in Pandas**

📌 **Definition of Series**
• A **Series** is a **one-dimensional** object in pandas that can hold **any type of data** (e.g., numbers, strings, etc.).
• It is **similar to a column** in a table.





In [None]:
import pandas as pd
#Creating a Series with a list
data = [1,2,3,4]
series = pd.Series(data)
print("Series /n",series) # Series /n 0    1
                                    # 1    2        
                                    # 2    3
                                    # 3    4

# Creating a Series with a dictionary
data = {'a': 1, 'b': 2, 'c': 3}
series = pd.Series(data)
print("Series /n",series) # Series /n a    1
                                    # b    2
                                    # c    3

# From a List with Custom Indexes:
data = [1, 2, 3, 4]
indexs = ['a', 'b', 'c', 'd']
series = pd.Series(data, index=indexs)
print("Series /n",series) # Series /n a    1
                                    # b    2
                                    # c    3
                                    # d    4


Series /n 0    1
1    2
2    3
3    4
dtype: int64
Series /n a    1
b    2
c    3
dtype: int64
Series /n a    1
b    2
c    3
d    4
dtype: int64


In [None]:
# 🔷 DataFrame in Pandas
# 📌 Definition of DataFrame
# • A DataFrame is a two-dimensional structure in pandas, similar to a table in Excel or a database.
# • It consists of rows and columns, allowing for organized data manipulation and analysis.

# Creating a DataFrame with a Dictionary:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)
print("DataFrame /n",df) # DataFrame /n       Name  Age         City
                                    # 0    Alice   25     New York
                                    # 1      Bob   30  Los Angeles
                                    # 2  Charlie   35      Chicago
#•	Creating DataFrame from a List of Dictionaries
data = [
    {'Name': 'Alice', 'Age': 25, 'City': 'New York'},
    {'Name': 'Bob', 'Age': 30, 'City': 'Los Angeles'},
    {'Name': 'Charlie', 'Age': 35, 'City': 'Chicago'}
]
df = pd.DataFrame(data)
print(df)               # DataFrame /n       Name  Age         City   
                                    # 0    Alice   25     New York
                                    # 1      Bob   30  Los Angeles
                                    # 2  Charlie   35      Chicago



DataFrame /n       Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [20]:
# Reading Data from Files
# Reading a CSV File
# •	You can read a CSV file using pd.read_csv():
df = pd.read_csv('sales_data.csv')
print(df.head(5))  # Display the top 5 rows
print(df.tail(5))  # Display the last 5 rows




   Transaction ID        Date Product Category             Product Name  \
0           10001  2024-01-01      Electronics            iPhone 14 Pro   
1           10002  2024-01-02  Home Appliances         Dyson V11 Vacuum   
2           10003  2024-01-03         Clothing         Levi's 501 Jeans   
3           10004  2024-01-04            Books        The Da Vinci Code   
4           10005  2024-01-05  Beauty Products  Neutrogena Skincare Set   

   Units Sold  Unit Price  Total Revenue         Region Payment Method  
0           2      999.99        1999.98  North America    Credit Card  
1           1      499.99         499.99         Europe         PayPal  
2           3       69.99         209.97           Asia     Debit Card  
3           4       15.99          63.96  North America    Credit Card  
4           1       89.99          89.99         Europe         PayPal  
     Transaction ID        Date Product Category  \
235           10236  2024-08-23  Home Appliances   
236    

In [None]:
# Accessing Data in DataFrame
# Accessing Columns
# •	You can access a column in a DataFrame by its Game Number:
# Sample DataFrame
df


Unnamed: 0,Transaction ID,Date,Product Category,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
0,10001,2024-01-01,Electronics,iPhone 14 Pro,2,999.99,1999.98,North America,Credit Card
1,10002,2024-01-02,Home Appliances,Dyson V11 Vacuum,1,499.99,499.99,Europe,PayPal
2,10003,2024-01-03,Clothing,Levi's 501 Jeans,3,69.99,209.97,Asia,Debit Card
3,10004,2024-01-04,Books,The Da Vinci Code,4,15.99,63.96,North America,Credit Card
4,10005,2024-01-05,Beauty Products,Neutrogena Skincare Set,1,89.99,89.99,Europe,PayPal
...,...,...,...,...,...,...,...,...,...
235,10236,2024-08-23,Home Appliances,Nespresso Vertuo Next Coffee and Espresso Maker,1,159.99,159.99,Europe,PayPal
236,10237,2024-08-24,Clothing,Nike Air Force 1 Sneakers,3,90.00,270.00,Asia,Debit Card
237,10238,2024-08-25,Books,The Handmaid's Tale by Margaret Atwood,3,10.99,32.97,North America,Credit Card
238,10239,2024-08-26,Beauty Products,Sunday Riley Luna Sleeping Night Oil,1,55.00,55.00,Europe,PayPal


In [None]:
import pandas as pd
# Sample DataFrame with 'Product Category' column
df = pd.DataFrame({
	'Product Category': ['Home Appliances', 'Electronics', 'Clothing'],
	'Price': [200, 150, 50],
	'Store': ['Store A', 'Store B', 'Store C']
})

# Access the 'Product Category' column
print(df['Product Category'])
print(df.loc[0]) # Access the first row                         # Output: 'Home Appliances'
print(df.iloc[0]) # Access the first row by index               # Output: 'Home Appliances'
print(df.iloc[0][0]) # Access the first row and first column    # Output: 'Home Appliances'

# Accessing specific elements
print(df.at[1,'Product Category']) # Access the element at row 1 and column 'Product Category'
# Output: 'Electronics'

# Accessing specific elements using iat 
# iat is used for integer-location based indexing    
print(df.iat[2, 2]) # Store C

# Adding a New Column
df['Salary'] = [50000, 60000, 70000]  
print(df) 

# Removing a Column
df.drop('Salary', axis=1, inplace=True)

# Updating a Column
df['Product Category'] = df['Product Category'].str.upper()  # Convert to uppercase
print(df)

# Dropping Rows
df.drop(0, axis=0, inplace=True)  # Drop the first row
print(df)


0    Home Appliances
1        Electronics
2           Clothing
Name: Product Category, dtype: object
Product Category    Home Appliances
Price                           200
Store                       Store A
Name: 0, dtype: object
Product Category    Home Appliances
Price                           200
Store                       Store A
Name: 0, dtype: object
Home Appliances
Electronics
Store C
  Product Category  Price    Store  Salary
0  Home Appliances    200  Store A   50000
1      Electronics    150  Store B   60000
2         Clothing     50  Store C   70000
  Product Category  Price    Store
0  HOME APPLIANCES    200  Store A
1      ELECTRONICS    150  Store B
2         CLOTHING     50  Store C
  Product Category  Price    Store
1      ELECTRONICS    150  Store B
2         CLOTHING     50  Store C


  print(df.iloc[0][0]) # Access the first row and first column


In [None]:
# 🗂 Sample DataFrame
 
import pandas as pd

# Create a sample DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Paris', 'London']
}

df = pd.DataFrame(data, index=['a', 'b', 'c'])
print(df)
#       Name  Age      City
# a    Alice   25  New York
# b      Bob   30     Paris
# c  Charlie   35    London

# 1️⃣ .iloc[] — Access by position/index (integer-based)
# Use .iloc to access data by row and column positions (0-based index).
print(df.iloc[0,0])  # First row, first column
# Output: Alice

# Accessing multipple value using iloc
print(df.iloc[0:2, 1])  # First two rows, second column
# Output:
# a    25
# b    30


# 2️⃣ .loc[] — Access by label (name-based)
# Use .loc to access data by row and column labels.
print(df.loc['a', 'Name'])  # Row with label 'a', column 'Name'
# Output: Alice

# Accessing multipple value using loc
print(df.loc['a':'b', 'Age'])  # Rows 'a' to 'b', column 'Age'
# a    25
# b    30



# 3️⃣ .at[] — Fast access by label, for a single value only
# .at is like .loc but faster and only works for a single element.
print(df.at['b', 'Age'])  # Row with label 'b', column 'Age'
# Output: 30


# 4️⃣ .iat[] — Fast access by position, for a single value only
# .iat is like .iloc but only for accessing a single element by integer position.
print(df.iat[2, 1])  # Third row, second column (0-based index)
# Output: 35


      Name  Age      City
a    Alice   25  New York
b      Bob   30     Paris
c  Charlie   35    London
Alice
a    25
b    30
Name: Age, dtype: int64
Alice
a    25
b    30
Name: Age, dtype: int64
30
35


In [14]:
# Accessing multipple value using loc
print(df.loc['a':'b', 'Age'])  # Rows 'a' to 'b', column 'Age'

a    25
b    30
Name: Age, dtype: int64
