Series

In [9]:
import pandas as pd
# A one-dimensional labeled array, capable of holding any data type.
data = [10, 20, 30]
s = pd.Series(data, index=['a', 'b', 'c'])
print(s)

a    10
b    20
c    30
dtype: int64


DataFrame

In [19]:
# A two-dimensional labeled data structure with columns.
data = {'Name': ['Alice', 'Bob'], 'Age': [25, 30]}
df = pd.DataFrame(data)
print(df)

    Name  Age
0  Alice   25
1    Bob   30


Reading and Writing Data

In [None]:
# CSV
df = pd.read_csv('file.csv')

df.to_csv('output.csv', index=False)

# Excel
df = pd.read_excel('file.xlsx', sheet_name='Sheet1')



DataFrame Operations

In [21]:
df.head(3)  # First 3 rows
df.tail(3)  # Last 3 rows
df.info()   # Summary of the DataFrame
df.describe()  # Statistics of numerical columns


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    2 non-null      object
 1   Age     2 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 164.0+ bytes


Unnamed: 0,Age
count,2.0
mean,27.5
std,3.535534
min,25.0
25%,26.25
50%,27.5
75%,28.75
max,30.0


In [22]:
# Select columns
df['Name']  # Single column
df[['Name', 'Age']]  # Multiple columns

# Filter rows
df[df['Age'] > 25]


Unnamed: 0,Name,Age
1,Bob,30


In [None]:
# Add column
df['Salary'] = [50000, 60000]

# Drop column
df.drop('Salary', axis=1, inplace=True)

# Drop rows
df.drop(0, axis=0, inplace=True)  # Drop row with index 0


Handling Missing Data

In [24]:
# Check
df.isnull().sum()

# Fill
df.fillna(0, inplace=True)  # Replace NaNs with 0

# Drop
df.dropna(inplace=True)

Grouping and Aggregations

In [25]:
# Grouping
grouped = df.groupby('Age')
print(grouped['Salary'].mean())

# Aggregation
df.groupby('Age').agg({'Salary': ['mean', 'max']})

Age
25    50000.0
30    60000.0
Name: Salary, dtype: float64


Unnamed: 0_level_0,Salary,Salary
Unnamed: 0_level_1,mean,max
Age,Unnamed: 1_level_2,Unnamed: 2_level_2
25,50000.0,50000
30,60000.0,60000


Merging, Joining, and Concatenating

In [26]:
# concat
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})
result = pd.concat([df1, df2])

# Merge
df1 = pd.DataFrame({'ID': [1, 2], 'Name': ['Alice', 'Bob']})
df2 = pd.DataFrame({'ID': [1, 2], 'Salary': [50000, 60000]})
merged = pd.merge(df1, df2, on='ID')

Sorting

In [27]:
df.sort_index(inplace=True)

df.sort_values(by='Age', ascending=False, inplace=True)

Apply Functions

In [None]:
df['Age'] = df['Age'].apply(lambda x: x * 2)

df['FullName'] = df.apply(lambda x: x['Name'] + ' Doe', axis=1)

Dates

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month

Pivot

In [None]:
pivot = df.pivot_table(index='Name', columns='Age', values='Salary', aggfunc='sum')

Visualization

In [None]:
df['Age'].plot(kind='bar')


Exporting to Other Formats

In [None]:
df.to_excel('output.xlsx', index=False)

df.to_json('output.json', orient='records')