<a href="https://colab.research.google.com/github/Arpitapal30/cheat_sheet_of_pandas/blob/main/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Importing Pandas

In [None]:
import pandas as pd


# 2. Creating Data
- Create a Series
- Create a DataFrame
- Read Data
- Write Data


In [None]:
# Create a Series
data = pd.Series([1, 2, 3, 4], name="numbers")


In [None]:
#Create a DataFrame
data = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35],
    "City": ["New York", "San Francisco", "Los Angeles"]
})


In [None]:
#Read Data
df = pd.read_csv("file.csv")         # From a CSV file
df = pd.read_excel("file.xlsx")      # From an Excel file
df = pd.read_json("file.json")       # From a JSON file


In [None]:
# Write Data
df.to_csv("file.csv", index=False)   # Write to a CSV file
df.to_excel("file.xlsx", index=False) # Write to an Excel file
df.to_json("file.json", orient="records") # Write to a JSON file


# 3. Viewing Data

In [None]:
df.head(n)        # First n rows (default 5)
df.tail(n)        # Last n rows (default 5)
df.info()         # Summary of DataFrame
df.describe()     # Descriptive stats (numerical columns)
df.shape          # Rows and columns
df.columns        # Column names
df.index          # Index labels


# 4. Selecting Data
- Select Columns
- Select Rows
- Filter Rows

In [None]:
#Select Columns
df["column_name"]          # Single column
df[["col1", "col2"]]       # Multiple columns


In [None]:
# Select Rows
df.iloc[0]                 # First row by position
df.loc[0]                  # First row by index label
df.loc[0:2, "col1"]        # Range of rows and specific column


In [None]:
# Filter Rows
df[df["Age"] > 30]         # Rows where Age > 30
df[df["City"] == "New York"]  # Rows where City = "New York"


# 5. Adding/Modifying Columns

In [None]:
df["New_Column"] = df["Age"] * 2           # Create or modify a column
df["Category"] = df["Age"].apply(lambda x: "Adult" if x > 18 else "Minor")


# 6. Dropping Data

In [None]:
df.drop("column_name", axis=1, inplace=True)  # Drop column
df.drop([0, 1], axis=0, inplace=True)         # Drop rows


# 7. Sorting

In [None]:
df.sort_values(by="Age", ascending=True, inplace=True)  # Sort by column
df.sort_index(ascending=False, inplace=True)            # Sort by index


# 8. Aggregations



In [None]:
df["Age"].mean()     # Mean of Age
df["Age"].sum()      # Sum of Age
df["Age"].max()      # Maximum Age
df["Age"].min()      # Minimum Age
df.groupby("City").mean()  # Group by City and calculate mean


# 9. Handling Missing Data

In [None]:
df.isnull().sum()        # Check missing values
df.dropna(inplace=True)  # Drop rows with missing values
df.fillna(0, inplace=True)  # Fill missing values with 0


# 10. Merging/Joining DataFrames
- Concatenate
- Merge


In [None]:
#Concatenate
result = pd.concat([df1, df2], axis=0)  # Vertical concat (rows)
result = pd.concat([df1, df2], axis=1)  # Horizontal concat (columns)


In [None]:
# Merge
result = pd.merge(df1, df2, on="key", how="inner")  # Join on "key" column

# 11. Pivot Table

In [None]:
df.pivot_table(values="Age", index="City", columns="Gender", aggfunc="mean")


# 12. Exporting Data

In [None]:
df.to_csv("output.csv", index=False)   # Export to CSV
df.to_excel("output.xlsx", index=False) # Export to Excel
df.to_json("output.json", orient="records") # Export to JSON


# 13. Other Useful Functions


In [None]:
df.sample(n=5)        # Random sample of rows
df.nunique()          # Number of unique values per column
df.duplicated()       # Check for duplicate rows
df.corr()             # Correlation between columns
df.memory_usage()     # Memory usage of DataFrame
