# 


In [None]:
# Pandas Cheat Sheet
import pandas as pd
import numpy as np

#### Data Creation
# Create DataFrame
df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': ['a', 'b', 'c'],
    'C': [1.0, 2.0, 3.0]
})

# Create Series
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])

#### Reading/Writing Data
# Read files
df_csv = pd.read_csv('file.csv')
df_excel = pd.read_excel('file.xlsx')
df_json = pd.read_json('file.json')

# Write files
df.to_csv('output.csv', index=False)
df.to_excel('output.xlsx')
df.to_json('output.json')

#### Basic Information
# Basic DataFrame info
df.head()        # First 5 rows
df.tail()        # Last 5 rows
df.info()        # DataFrame info
df.describe()    # Statistical summary
df.shape         # Dimensions
df.columns       # Column names
df.dtypes        # Data types

#### Selection and Indexing
# Basic selection
df['A']                  # Select column
df[['A', 'B']]          # Select multiple columns
df.loc[0]               # Select row by label
df.iloc[0]              # Select row by position
df.loc[0, 'A']          # Select value by label
df.iloc[0, 0]           # Select value by position

# Boolean indexing
df[df['A'] > 2]         # Filter rows
df[(df['A'] > 2) & (df['B'] == 'a')]  # Multiple conditions

#### Data Cleaning
# Handle missing values
df.isna()               # Check missing values
df.dropna()             # Drop missing values
df.fillna(0)            # Fill missing values
df.fillna(method='ffill')  # Forward fill
df.fillna(method='bfill')  # Backward fill

# Remove duplicates
df.drop_duplicates()
df.drop_duplicates(subset=['A'])  # Based on specific columns

#### Data Manipulation
# Sort
df.sort_values('A')                     # Sort by column
df.sort_values(['A', 'B'], ascending=[True, False])  # Multiple columns

# Group operations
df.groupby('A').mean()                  # Group by and aggregate
df.groupby(['A', 'B']).agg({
    'C': ['mean', 'sum']
})                                      # Multiple aggregations

# Merge and Join
df1.merge(df2, on='key')               # Merge DataFrames
df1.join(df2)                          # Join DataFrames

# Concatenate
pd.concat([df1, df2])                  # Vertical concatenation
pd.concat([df1, df2], axis=1)          # Horizontal concatenation

#### Data Transformation
# Apply functions
df['A'].apply(lambda x: x*2)           # Apply to column
df.apply(lambda x: x.max() - x.min())  # Apply to DataFrame

# String operations
df['B'].str.upper()                    # Convert to uppercase
df['B'].str.contains('a')              # Check if contains
df['B'].str.split(',')                 # Split string

#### Time Series
# Date operations
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)
df.resample('M').mean()                # Monthly resampling
df['date'].dt.year                     # Extract year
df['date'].dt.month                    # Extract month

#### Statistical Operations
# Basic statistics
df.mean()
df.median()
df.std()
df.corr()                              # Correlation matrix
df.cov()                               # Covariance matrix

#### Reshaping Data
# Pivot and melt
df_pivot = df.pivot(index='A', columns='B', values='C')
df_melted = pd.melt(df, id_vars=['A'], value_vars=['B', 'C'])

# Pivot tables
df.pivot_table(values='C', 
              index='A', 
              columns='B', 
              aggfunc='mean')

#### Advanced Operations
# Window functions
df['rolling_mean'] = df['A'].rolling(window=3).mean()
df['cumsum'] = df['A'].cumsum()

# Category operations
df['B'] = df['B'].astype('category')   # Convert to category
df['B'].cat.codes                      # Get category codes

#### Performance Tips
# Memory usage
df.memory_usage(deep=True)             # Check memory usage
df.select_dtypes(include=['float'])    # Select by data type

# Efficient operations
df.eval('D = A + C')                   # Efficient arithmetic
df.query('A > 2')                      # Efficient filtering