## Cheatsheet - Data Analyst 

### Library 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


### Data Manipulation 

#### 1. Read files

In [None]:
df = pd.read_csv('file.csv')
df = pd.read_excel('file.xlsx')
df = pd.read_json('file.json')

def read_data(file_path):
    return pd.read_excel(file_path)

#### 2. Data Information 

In [None]:
df.head() 
df.info() 
df.describe()  

#### 3. Data Cleaning

In [None]:
df.dropna()  
df.fillna(value) #Fills in missing values
df.drop_duplicates() 
df['column'] = df['column'].str.strip()  #Remove spaces

#### 4. Column manipulation 

In [None]:
df['new_column'] = df['column1'] + df['column2']  
df.rename(columns={'old_name': 'new_name'})  
df['date'] = pd.to_datetime(df['date'])  #Datetime conversion

#### 5. Filtering and selection

In [None]:
df_filtered = df[df['column'] > value]
df_selected = df[['column1', 'column2']]

#### 6. Aggregation and grouping

In [None]:
df_grouped = df.groupby('column').agg({'column1': 'sum', 'column2': 'mean'})
df_pivot = df.pivot_table(index='column1', columns='column2', values='column3', aggfunc='sum')

#### 7. Merging and joining

In [None]:
df_merged = pd.merge(df1, df2, on='common_column', how='inner')
df_concatenated = pd.concat([df1, df2])