# Pandas Commands

In [19]:
import pandas as pd
import numpy as np

#### 1. Setup & Inspection

```python
import pandas as pd

df = pd.read_csv("file.csv")       # Load CSV
df.head()                          # First 5 rows
df.tail(10)                        # Last 10 rows
df.info()                          # Column types & nulls
df.describe()                      # Summary statistics
df.shape                          # (rows, columns)

In [25]:
df = pd.DataFrame({
    'A': [1, 2, 3,4],
    'B': [5, 6,7,8],
    'C': [9, 10,11,12]
})

In [27]:
df.dtypes

A    int64
B    int64
C    int64
dtype: object

In [11]:
df

Unnamed: 0,A,B,C
0,1,5,9
1,2,6,10
2,3,7,11
3,4,8,12


#### 2. Selection and Indexing

```python
df["col"]                          # Single column
df[["col1", "col2"]]               # Multiple columns
df.loc[0, "col"]                   # Row + column by labels
df.iloc[0, 2]                      # Row + column by index
df[df["col"] > 10]                 # Conditional filtering


In [22]:
# Each command returns a numpy array, to extract values use .values

# df['A']
# df.loc[2, 'B']  # Row + column by labels
# df.iloc[3, 2] # Row + column by position

# value = df.iloc[2, 2]
# print(value)

array = df['B']
print(array)

0    5
1    6
2    7
3    8
Name: B, dtype: int64


#### 3. Sorting & Renaming

```Python
df.sort_values("col", ascending=False)    # Sort by column
df.rename(columns={"old":"new"}, inplace=True)
df.reset_index(drop=True, inplace=True)

df['A'] = df['A'].astype('float')

#### 4. Handling Missing Data

```Python
df.isna().sum()                    # Count missing
df.dropna()                        # Drop rows with NaN
df.fillna(0)                       # Fill with 0
df.fillna(df["col"].mean())        # Fill with mean

In [None]:
# df.dropna(inplace=true)  # Drop rows with any NaN values
# df.fillna(0, inplace=true)  # Fill NaN values with 0

# df['B'].replace(np.nan,df['B'].mean(),inplace=True)  # Replace NaN with column mean

#### 5. Aggregation & Grouping

```Python
df["col"].value_counts()           # Frequency count
df.groupby("category")["value"].mean()
df.pivot_table(values="val", index="col1", columns="col2", aggfunc="mean")

#### 6. Adding & Modifying Columns

```Python
df["new"] = df["col1"] + df["col2"]
df["col"].apply(lambda x: x**2)
df.assign(ratio=df["a"]/df["b"])


In [28]:
# Conditional Selection
# filtered_df = df[df['A'] > 2]
# filtered_df_2 = df[(df['A'] > 1) & (df['B'] < 8)]

#### 7. Merging & Joining

```Python
pd.concat([df1, df2])              # Stack vertically
pd.merge(df1, df2, on="key")       # SQL-style join

#### 8. Exporting

````Python
df.to_csv("output.csv", index=False)
df.to_excel("output.xlsx", index=False)

In [None]:
# Plots

# df.plot(x='A', y='B', kind='scatter')  # Scatter plot
# df.plot(x='A', y='B', kind='line')  # Line plot
# df.plot(x='A', y='B', kind='bar')  # Bar plot
# df.plot(x='A', y='B', kind='barh')  # Horizontal bar plot
# df.plot(x='A', y='B', kind='hist')  # Histogram
# df.plot(x='A', y='B', kind='box')  # Box plot
# df.plot(x='A', y='B', kind='area')  # Area plot


