# **Core Data Structures in Pandas**

## ***Pandas is built on two main data structures:***

Series → One-dimensional (like a single column in Excel)

DataFrame → Two-dimensional (like a full spreadsheet or SQL table)

In [None]:
# Series — 1D Labeled Array

import pandas as pd

s = pd.Series([10, 20, 30, 40])
print(s)

0    10
1    20
2    30
3    40
dtype: int64


In [None]:
# You can also define a custom index:

s = pd.Series([10, 20, 30], index=["a", "b", "c"])
print(s)

a    10
b    20
c    30
dtype: int64


In [None]:
# DataFrame — 2D Labeled Table

data = {
          "name": ["Alice", "Bob", "Charlie"],
          "age": [25, 30, 35],
          "city": ["Delhi", "Mumbai", "Bangalore"]
      }

df = pd.DataFrame(data)
print(df)

# each column in a dataframe is a series

      name  age       city
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   35  Bangalore


In [None]:
# Index and Labels


data = {
          "name": ["Alice", "Bob", "Charlie"],
          "age": [25, 30, 35],
          "city": ["Delhi", "Mumbai", "Bangalore"]
      }

df = pd.DataFrame(data)
print(df)
df.index     #Row labels

      name  age       city
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   35  Bangalore


RangeIndex(start=0, stop=3, step=1)

In [None]:
data = {
          "name": ["Alice", "Bob", "Charlie"],
          "age": [25, 30, 35],
          "city": ["Delhi", "Mumbai", "Bangalore"]
      }

df = pd.DataFrame(data)
print(df)
df.columns     #Columns labels

      name  age       city
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   35  Bangalore


Index(['name', 'age', 'city'], dtype='object')

In [None]:
df.index = ["a", "b", "c"]
df.columns = ["Name", "Age", "City"]
print(df)

      Name  Age       City
a    Alice   25      Delhi
b      Bob   30     Mumbai
c  Charlie   35  Bangalore


## **Creating DataFrames**

In [None]:
# From Python Lists

data = [
          ["Alice", 25],
          ["Bob", 30],
          ["Charlie", 35]
      ]
df = pd.DataFrame(data, columns=["Name", "Age"])
print(df)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [None]:
# From Dictionary of Lists

data = {
        "Name": ["Alice", "Bob", "Charlie"],
        "Age": [25, 30, 35]
      }

df = pd.DataFrame(data)
print(df)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [None]:
# From NumPy Arrays

import numpy as np
arr = np.array([[1, 2], [3, 4]])
df = pd.DataFrame(arr, columns=["A", "B"])
print(df)

   A  B
0  1  2
1  3  4


In [None]:
# From CSV Files


df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Python - Pandas/CodeWithHarry - Practice/data.csv")
print(df)

    Nirmal  76
0   Tousif  56
1  Parthiv  21
2   Gourav  21
3     Anji  21


In [None]:
# From Excel Files

df = pd.read_excel("data.xlsx")
print(df)

In [None]:
# From JSON

df = pd.read_json("data.json")
print(df)

In [None]:
# From SQL Databases

import sqlite3

conn = sqlite3.connect("mydb.sqlite")
df = pd.read_sql("SELECT * FROM users", conn)

In [None]:
# From the Web (Example: CSV from URL)

url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv"
df = pd.read_csv(url)
print(df)

     total_bill   tip     sex smoker   day    time  size
0         16.99  1.01  Female     No   Sun  Dinner     2
1         10.34  1.66    Male     No   Sun  Dinner     3
2         21.01  3.50    Male     No   Sun  Dinner     3
3         23.68  3.31    Male     No   Sun  Dinner     2
4         24.59  3.61  Female     No   Sun  Dinner     4
..          ...   ...     ...    ...   ...     ...   ...
239       29.03  5.92    Male     No   Sat  Dinner     3
240       27.18  2.00  Female    Yes   Sat  Dinner     2
241       22.67  2.00    Male    Yes   Sat  Dinner     2
242       17.82  1.75    Male     No   Sat  Dinner     2
243       18.78  3.00  Female     No  Thur  Dinner     2

[244 rows x 7 columns]


## **EDA (Exploratory Data Analysis) on this dataset**

In [None]:
# EDA (Exploratory Data Analysis) on this dataset

url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv"
df = pd.read_csv(url)

df.head()           #First 5 rows

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [None]:
df.tail()           #Last 5 rows

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.0,Female,Yes,Sat,Dinner,2
241,22.67,2.0,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2
243,18.78,3.0,Female,No,Thur,Dinner,2


In [None]:
df.info() # Column info: types, non-nulls

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   total_bill  244 non-null    float64
 1   tip         244 non-null    float64
 2   sex         244 non-null    object 
 3   smoker      244 non-null    object 
 4   day         244 non-null    object 
 5   time        244 non-null    object 
 6   size        244 non-null    int64  
dtypes: float64(2), int64(1), object(4)
memory usage: 13.5+ KB


In [None]:
df.describe() # Stats for numeric columns

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


In [None]:
df.columns # List of column names

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')

In [None]:
df.shape # (rows, columns)

(244, 7)

# **Data Selection & Filtering**

## **Data Selection**

In [None]:
# Selecting Columns
import pandas as pd

data = [
          ["Alice", 25, "Singing"],
          ["Bob", 30, "Dancing"],
          ["Charlie", 35, "Coding"]
      ]
df = pd.DataFrame(data, columns=["Name", "Age", "Hobby"])
df["Name"]

Unnamed: 0,Name
0,Alice
1,Bob
2,Charlie


In [None]:
df[["Name", "Hobby"]]

Unnamed: 0,Name,Hobby
0,Alice,Singing
1,Bob,Dancing
2,Charlie,Coding


In [None]:
# Selecting Rows by Index

df.loc[0]           # First row (by label)

Unnamed: 0,0
Name,Alice
Age,25
Hobby,Singing


In [None]:
df.iloc[0]          # First row (by position)

Unnamed: 0,0
Name,Alice
Age,25
Hobby,Singing


In [None]:
# Select Specific Rows and Columns

df.loc[0, "Name"]           # Value at row 0, column 'Name'


'Alice'

In [None]:
df.iloc[0, 1]               # Value at row 0, column at index 1

np.int64(25)

In [None]:
# You can also slice

df.loc[0:2, ["Name", "Age"]]          # Rows 0 to 2, selected columns


Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,35


In [None]:
df.iloc[0:2, 0:3]                     # Rows and cols by index position

Unnamed: 0,Name,Age,Hobby
0,Alice,25,Singing
1,Bob,30,Dancing


In [None]:
# Fast Access: .at and .iat
# These are optimized for single element access:


df.at[0, "Name"]          # Fast label-based access



'Alice'

In [None]:
df.iat[0, 1]              # Fast position-based access

np.int64(25)

## **Filtering with Conditions**

In [None]:
# Simple Condition

df[df["Age"] > 30]

Unnamed: 0,Name,Age,Hobby
2,Charlie,35,Coding


In [None]:
# Multiple Conditions (AND / OR)

df[(df["Age"] > 25) & (df["Hobby"] == "Coding")]


Unnamed: 0,Name,Age,Hobby
2,Charlie,35,Coding


In [None]:
df[(df["Name"] == "Bob") | (df["Age"] < 30)]

Unnamed: 0,Name,Age,Hobby
0,Alice,25,Singing
1,Bob,30,Dancing


In [None]:
# Querying with .query()

df.query("Age > 25 and Hobby == 'Coding'")

Unnamed: 0,Name,Age,Hobby
2,Charlie,35,Coding


In [None]:
# Dynamic column names:

col = "Age"
df.query(f"{col} > 25")

Unnamed: 0,Name,Age,Hobby
1,Bob,30,Dancing
2,Charlie,35,Coding


### **Rules for .query()**

In [None]:
# You can use @ to reference Python variables

age_limit = 30
df.query("Age > @age_limit")

Unnamed: 0,Name,Age,Hobby
2,Charlie,35,Coding


In [None]:
# Chained comparisons

df.query("25 < Age <= 40")

Unnamed: 0,Name,Age,Hobby
1,Bob,30,Dancing
2,Charlie,35,Coding


In [None]:
# String values must be in quotes

df.query("Name == 'Bob'")

Unnamed: 0,Name,Age,Hobby
1,Bob,30,Dancing


In [None]:
# Use backticks for column names with spaces or special characters

df.query("`first name` == 'Alice'")

In [None]:
# Logical operators

df.query("age > 30 and city == 'Delhi'")


In [None]:
# Avoid using reserved keywords as column names (use backsticks)

df.query("`class` == 'Physics'")

In [None]:
# Case-sensitive

df.query("City == 'delhi'")       # ❌ if actual value is 'Delhi'

In [None]:
# .query() returns a copy, not a view

filtered = df.query("Age < 20")
print(filtered)

Empty DataFrame
Columns: [Name, Age, Hobby]
Index: []


## **Data Cleaning & Preprocessing**


### **Handling Missing Values**

In [None]:
# Check for Missing Data

df.isnull()           # True for NaNs


Unnamed: 0,Name,Age,Hobby
0,False,False,False
1,False,False,False
2,False,False,False


In [None]:
df.isnull().sum()     # Count missing per column

Unnamed: 0,0
Name,0
Age,0
Hobby,0


In [None]:
# Drop Missing Data

df.dropna()         # Drop rows with *any* missing values


Unnamed: 0,Name,Age,Hobby
0,Alice,25,Singing
1,Bob,30,Dancing
2,Charlie,35,Coding


In [None]:
df.dropna(axis=1)       # Drop columns with missing values

Unnamed: 0,Name,Age,Hobby
0,Alice,25,Singing
1,Bob,30,Dancing
2,Charlie,35,Coding


In [None]:
# Fill Missing Data

df.fillna(0)        # Replace NaN with 0

Unnamed: 0,Name,Age,Hobby
0,Alice,25,Singing
1,Bob,30,Dancing
2,Charlie,35,Coding


In [None]:
df["Age"].fillna(df["Age"].mean())        # Replace with mean

Unnamed: 0,Age
0,25
1,30
2,35


In [None]:
df.ffill()            # Forward fill


Unnamed: 0,Name,Age,Hobby
0,Alice,25,Singing
1,Bob,30,Dancing
2,Charlie,35,Coding


In [None]:
df.bfill()              # Backward fill

Unnamed: 0,Name,Age,Hobby
0,Alice,25,Singing
1,Bob,30,Dancing
2,Charlie,35,Coding


In [None]:
# Detecting & Removing Duplicates

df.duplicated()       # True for duplicates

Unnamed: 0,0
0,False
1,False
2,False


In [None]:
df.drop_duplicates()        # Remove duplicate rows

Unnamed: 0,Name,Age,Hobby
0,Alice,25,Singing
1,Bob,30,Dancing
2,Charlie,35,Coding


In [None]:
# Check based on specific columns

df.duplicated(subset=["Name", "Age"])

Unnamed: 0,0
0,False
1,False
2,False


In [None]:
# String Operations with .str

df["Name"].str.lower()          # Converts all names to lowercase.


Unnamed: 0,Name
0,alice
1,bob
2,charlie


In [None]:
df["Hobby"].str.contains("Singing", case=False)         # Checks if 'singing' is in the Hobby

Unnamed: 0,Hobby
0,True
1,False
2,False


In [None]:
df["Email"].str.split("@")        # Outputs a pandas Series where each element is a list

In [None]:
# Type Conversions with .astype()

df["Age"] = df["Age"].astype(int)
df["Date"] = pd.to_datetime(df["Date"])                           #special case
df["Category"] = df["Category"].astype("category")

In [None]:
# Check data types:

df.dtypes

Unnamed: 0,0
Name,object
Age,int64
Hobby,object


In [None]:
# Applying Functions
# .apply() → Apply any function to rows or columns

df["Age Group"] = df["Age"].apply(lambda x: "Adult" if x >= 18 else "Minor")
print(df)

      Name  Age    Hobby Age Group
0    Alice   25  Singing     Adult
1      Bob   30  Dancing     Adult
2  Charlie   35   Coding     Adult


In [None]:
df['Gender'] = ["Male", "Female", "Male"]
print(df)

      Name  Age    Hobby Age Group  Gender
0    Alice   25  Singing     Adult    Male
1      Bob   30  Dancing     Adult  Female
2  Charlie   35   Coding     Adult    Male


In [None]:
# .map() → Element-wise mapping for Series

gender_map = {"Male": "M", "Female": "F"}
df["Gender"] = df["Gender"].map(gender_map)
print(df)

      Name  Age    Hobby Age Group Gender
0    Alice   25  Singing     Adult    NaN
1      Bob   30  Dancing     Adult    NaN
2  Charlie   35   Coding     Adult    NaN


In [None]:
# .replace() → Replace specific values

df["City"].replace({"Del": "Delhi", "Mum": "Mumbai"})





## **Data Transformation**

In [None]:
# Sorting & Ranking

# Sort by Values

df.sort_values("Age") # Ascending sort

Unnamed: 0,Name,Age,Hobby,Age Group,Gender
0,Alice,25,Singing,Adult,Male
1,Bob,30,Dancing,Adult,Female
2,Charlie,35,Coding,Adult,Male


In [None]:
df.sort_values("Age", ascending=False) # Descending

Unnamed: 0,Name,Age,Hobby,Age Group,Gender
2,Charlie,35,Coding,Adult,Male
1,Bob,30,Dancing,Adult,Female
0,Alice,25,Singing,Adult,Male


In [None]:
df.sort_values(["Age", "Salary"])     # Sort by multiple columns

# If age is tie -> then salary will be checked

In [None]:
# Reset Index


df.reset_index(drop=True, inplace=True) # Reset the index and drop the old index
print(df)

      Name  Age    Hobby Age Group  Gender
0    Alice   25  Singing     Adult    Male
1      Bob   30  Dancing     Adult  Female
2  Charlie   35   Coding     Adult    Male


In [None]:
# Sort by Index

df.sort_index()

In [None]:
# Renaming Columns & Index

df.rename(columns={"oldName": "newName"}, inplace=True)
df.rename(index={0: "row1", 1: "row2"}, inplace=True)

In [None]:
# To rename all columns:

df.columns = ["Name", "Age", "City"]

In [None]:
# Changing Column Order

df = df[["Hobby", "Name", "Age"]] # Reorder as desired
print(df)

     Hobby     Name  Age
0  Singing    Alice   25
1  Dancing      Bob   30
2   Coding  Charlie   35


In [None]:
# You can also move one column to the front:

cols = ["Age"] + [col for col in df.columns if col != "Age"]
df = df[cols]
print(df)

   Age    Hobby     Name
0   25  Singing    Alice
1   30  Dancing      Bob
2   35   Coding  Charlie


## **Reshaping Data using Melt and Pivot**


In [None]:
# melt() — Wide to Long

import pandas as pd

# Sample DataFrame

data = {
          'Name': ['Alice', 'Bob', 'Charlie'],
          'Math': [85, 78, 92],
          'Science': [90, 82, 89],
          'English': [88, 85, 94]
        }
df = pd.DataFrame(data)

# Display the DataFrame
print(df)


      Name  Math  Science  English
0    Alice    85       90       88
1      Bob    78       82       85
2  Charlie    92       89       94


In [None]:
df.melt(id_vars=["Name"], value_vars=["Math", "Science", "English"], var_name="Subject", value_name="Socre")


# id_vars=["Name"] : We keep the “Name” column as it is because its the identifier.
# value_vars=["Math", "Science", "English"] : These are the columns we want to melt.
# var_name="Subject" : The new column containing the names of the subjects.
# value_name="Score" : The new column containing the scores.


Unnamed: 0,Name,Subject,Socre
0,Alice,Math,85
1,Bob,Math,78
2,Charlie,Math,92
3,Alice,Science,90
4,Bob,Science,82
5,Charlie,Science,89
6,Alice,English,88
7,Bob,English,85
8,Charlie,English,94


In [None]:
# Using pivot() to reshape it into wide format:

df.pivot(index="Name", columns="Subject", values="Score")

# Duplicate Entries: If you have multiple rows with the same combination of
# index and columns , pivot() will raise an error. In such cases, you should use
# pivot_table() (which can handle duplicate entries by aggregating them).


In [None]:
# We can use pivot_table() to aggregate values (e.g., taking the mean for duplicate entries):

df.pivot_table(index="Name", columns="Subject", values="Score", aggfunc="mean")

## **Aggregation & Grouping**

In [None]:
# .groupby() Function

df = pd.DataFrame({
                    "Department": ["HR", "HR", "IT", "IT", "Marketing", "Marketing", "Sales", "Sales"],
                    "Team": ["A", "A", "B", "B", "C", "C", "D", "D"],
                    "Gender": ["M", "F", "M", "F", "M", "F", "M", "F"],
                    "Salary": [85, 90, 78, 85, 92, 88, 75, 80],
                    "Age": [23, 25, 30, 22, 28, 26, 21, 27],
                    "JoinDate": pd.to_datetime([
                                                  "2020-01-10", "2020-02-15", "2021-03-20", "2021-04-10",
                                                  "2020-05-30", "2020-06-25", "2021-07-15", "2021-08-01"
                                                ])
                  })

print(df)

  Department Team Gender  Salary  Age   JoinDate
0         HR    A      M      85   23 2020-01-10
1         HR    A      F      90   25 2020-02-15
2         IT    B      M      78   30 2021-03-20
3         IT    B      F      85   22 2021-04-10
4  Marketing    C      M      92   28 2020-05-30
5  Marketing    C      F      88   26 2020-06-25
6      Sales    D      M      75   21 2021-07-15
7      Sales    D      F      80   27 2021-08-01


In [None]:
df.groupby("Department")["Salary"].mean()

Unnamed: 0_level_0,Salary
Department,Unnamed: 1_level_1
HR,87.5
IT,81.5
Marketing,90.0
Sales,77.5


In [None]:
df.groupby("Team")["Salary"].mean() # Average per team

Unnamed: 0_level_0,Salary
Team,Unnamed: 1_level_1
A,87.5
B,81.5
C,90.0
D,77.5


In [None]:
df.groupby("Team")["Salary"].sum() # Total score

Unnamed: 0_level_0,Salary
Team,Unnamed: 1_level_1
A,175
B,163
C,180
D,155


In [None]:
df.groupby("Team")["Salary"].count() # How many entries

Unnamed: 0_level_0,Salary
Team,Unnamed: 1_level_1
A,2
B,2
C,2
D,2


In [None]:
df.groupby("Team")["Salary"].min()

Unnamed: 0_level_0,Salary
Team,Unnamed: 1_level_1
A,85
B,78
C,88
D,75


In [None]:
df.groupby("Team")["Salary"].max()

Unnamed: 0_level_0,Salary
Team,Unnamed: 1_level_1
A,90
B,85
C,92
D,80


In [None]:
# Custom Aggregations with .agg()

df.groupby("Team")["Salary"].agg(["mean", "max", "min"])

Unnamed: 0_level_0,mean,max,min
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,87.5,90,85
B,81.5,85,78
C,90.0,92,88
D,77.5,80,75


In [None]:
df.groupby("Team")["Salary"].agg(avg_score="mean", high_score="max")            #avg_score 1 column,  high_score 1 column


Unnamed: 0_level_0,avg_score,high_score
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
A,87.5,90
B,81.5,85
C,90.0,92
D,77.5,80


In [None]:
# Apply different functions to different columns:

df.groupby("Team").agg({
                          "Salary": "mean",
                          "Age": "max"
})


Unnamed: 0_level_0,Salary,Age
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
A,87.5,25
B,81.5,30
C,90.0,28
D,77.5,27


In [None]:
# .transform() Example:

df["Team Avg"] = df.groupby("Team")["Salary"].transform("mean")
print(df)

  Department Team Gender  Salary  Age   JoinDate  Team Avg
0         HR    A      M      85   23 2020-01-10      87.5
1         HR    A      F      90   25 2020-02-15      87.5
2         IT    B      M      78   30 2021-03-20      81.5
3         IT    B      F      85   22 2021-04-10      81.5
4  Marketing    C      M      92   28 2020-05-30      90.0
5  Marketing    C      F      88   26 2020-06-25      90.0
6      Sales    D      M      75   21 2021-07-15      77.5
7      Sales    D      F      80   27 2021-08-01      77.5


In [None]:
# .filter() Example:

df.groupby("Team").filter(lambda x: x["Salary"].mean() > 80)

Unnamed: 0,Department,Team,Gender,Salary,Age,JoinDate,Team Avg
0,HR,A,M,85,23,2020-01-10,87.5
1,HR,A,F,90,25,2020-02-15,87.5
2,IT,B,M,78,30,2021-03-20,81.5
3,IT,B,F,85,22,2021-04-10,81.5
4,Marketing,C,M,92,28,2020-05-30,90.0
5,Marketing,C,F,88,26,2020-06-25,90.0


In [4]:
import pandas as pd

# Merge Like SQL: pd.merge()


# Sample DataFrames

employees = pd.DataFrame({
                            "EmpID": [1, 2, 3],
                            "Name": ["Alice", "Bob", "Charlie"],
                            "DeptID": [10, 20, 30]
                          })

departments = pd.DataFrame({
                            "DeptID": [10, 20, 40],
                            "DeptName": ["HR", "Engineering", "Marketing"]
                          })

In [5]:
print(employees)

   EmpID     Name  DeptID
0      1    Alice      10
1      2      Bob      20
2      3  Charlie      30


In [6]:
print(departments)

   DeptID     DeptName
0      10           HR
1      20  Engineering
2      40    Marketing


In [7]:
# Inner Join (default)

pd.merge(employees, departments, on="DeptID")

Unnamed: 0,EmpID,Name,DeptID,DeptName
0,1,Alice,10,HR
1,2,Bob,20,Engineering


In [8]:
# Left Join

pd.merge(employees, departments, on="DeptID", how="left")

Unnamed: 0,EmpID,Name,DeptID,DeptName
0,1,Alice,10,HR
1,2,Bob,20,Engineering
2,3,Charlie,30,


In [9]:
# Right Join

pd.merge(employees, departments, on="DeptID", how="right")

Unnamed: 0,EmpID,Name,DeptID,DeptName
0,1.0,Alice,10,HR
1,2.0,Bob,20,Engineering
2,,,40,Marketing


In [10]:
# Outer Join

pd.merge(employees, departments, on="DeptID", how="outer")

Unnamed: 0,EmpID,Name,DeptID,DeptName
0,1.0,Alice,10,HR
1,2.0,Bob,20,Engineering
2,3.0,Charlie,30,
3,,,40,Marketing


## **Concatenating DataFrames**


In [12]:
# Use pd.concat() to stack datasets either vertically or horizontally.
# Vertical (rows)


df1 = pd.DataFrame({"Name": ["Alice", "Bob"]})
df2 = pd.DataFrame({"Name": ["Charlie", "David"]})
pd.concat([df1, df2])

Unnamed: 0,Name
0,Alice
1,Bob
0,Charlie
1,David


In [14]:
# Horizontal (columns)

df1 = pd.DataFrame({"ID": [1, 2]})
df2 = pd.DataFrame({"Score": [90, 80, 78]})
pd.concat([df1, df2], axis=1)

Unnamed: 0,ID,Score
0,1.0,90
1,2.0,80
2,,78


## **Reading & Writing Files in Pandas**


### **CSV Files**

In [None]:
# Read CSV

df = pd.read_csv("data.csv")
pd.read_csv("data.csv", usecols=["Name", "Age"], nrows=10)    #only read datas from column Name and Age upto 10th row

# Write CSV

df.to_csv("output.csv", index=False)

### **Excel Files**


In [None]:
# Read Excel

df = pd.read_excel("data.xlsx")
pd.read_excel("data.xlsx", sheet_name="Sales")        #Read entire data but only from sheet name = Sales

# Write Excel

df.to_excel("output.xlsx", index=False)


# Multiple sheets:
with pd.ExcelWriter("report.xlsx") as writer:
  df1.to_excel(writer, sheet_name="Summary", index=False)
  df2.to_excel(writer, sheet_name="Details", index=False)

### **JSON Files**

In [None]:
# Read JSON

df = pd.read_json("data.json")