In [2]:
# Pandas Data Manipulation & Analysis Cheat Sheet 
import pandas as pd
import numpy as np

# Creating a DataFrame
df = pd.DataFrame({
    "A": [1, 2, 3],
    "B": [4, 5, 6],
    "C": ["x", "y", "z"]
})

# Reading & Writing Data
df = pd.read_csv("file.csv")         # Read CSV
df.to_csv("output.csv", index=False) # Write CSV

df = pd.read_excel("file.xlsx")      # Read Excel
df.to_excel("output.xlsx")           # Write Excel

df = pd.read_json("file.json")       # Read JSON
df.to_json("output.json")            # Write JSON

# Viewing Data
df.head()   # First 5 rows
df.tail(3)  # Last 3 rows
df.info()   # Summary of DataFrame
df.describe()  # Summary statistics

# Selecting Data
df["A"]       # Select column
df[["A", "B"]] # Select multiple columns
df.iloc[0]    # Select first row
df.loc[0, "A"] # Select specific value

# Filtering Data
df[df["A"] > 1]  # Filter rows where A > 1
df[df["C"] == "y"] # Filter where C is 'y'
df.query("A > 1")  # Query method

# Sorting Data
df.sort_values("A", ascending=False)  # Sort by column A descending
df.sort_index()  # Sort by index

# Handling Missing Data
df.dropna()      # Remove rows with NaN values
df.fillna(0)     # Replace NaN with 0
df["A"].fillna(df["A"].mean())  # Fill with mean

# Adding & Removing Columns
df["D"] = df["A"] * 2  # New column from calculation
df.drop(columns=["D"], inplace=True)  # Remove column D

# Grouping & Aggregation
df.groupby("C")["A"].mean()  # Group by C and get mean of A
df.groupby("C").agg({"A": "sum", "B": "mean"})  # Multiple aggregations

# Pivot Tables
df.pivot_table(index="C", values="A", aggfunc="sum")  # Pivot by C

# Merging & Joining
df1 = pd.DataFrame({"ID": [1, 2, 3], "Name": ["A", "B", "C"]})
df2 = pd.DataFrame({"ID": [1, 2, 4], "Score": [90, 85, 95]})

merged = df1.merge(df2, on="ID", how="inner")  # Inner join
outer = df1.merge(df2, on="ID", how="outer")  # Outer join

# Concatenation
df_concat = pd.concat([df1, df2], axis=0)  # Stack rows
df_concat = pd.concat([df1, df2], axis=1)  # Side-by-side

# Apply Functions
df["A"].apply(lambda x: x * 2)  # Apply function to column
df.applymap(lambda x: str(x) + "!")  # Apply to all elements

# String Operations
df["C"].str.upper()   # Convert to uppercase
df["C"].str.contains("x")  # Check if contains 'x'

# DateTime Handling
df["Date"] = pd.to_datetime(df["Date"])  # Convert to datetime
df["Year"] = df["Date"].dt.year  # Extract year

# Reshaping Data // Unpivot Function
df.melt(id_vars=["C"], value_vars=["A", "B"])  # Convert wide to long format

# Window Functions (Rolling & Expanding)
df["Rolling_Avg"] = df["A"].rolling(window=2).mean()  # 2-row rolling avg

# Exporting Data
df.to_csv("output.csv")  # Save to CSV
df.to_excel("output.xlsx")  # Save to Excel
df.to_json("output.json")  # Save to JSON

