<a href="https://colab.research.google.com/github/Brenda01234/Generative-AI/blob/main/Pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

**Core Data structures**

In [None]:
import pandas as pd
import numpy as np

# Series - 1D labeled array
s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

# DataFrame - 2D labeled data structure (like a spreadsheet)
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'London', 'Tokyo']
}
df = pd.DataFrame(data)
print(df)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64
      Name  Age      City
0    Alice   25  New York
1      Bob   30    London
2  Charlie   35     Tokyo


**Data Import/Export**

In [None]:
# Read from various formats
df_csv = pd.read_csv('data.csv')
df_excel = pd.read_excel('data.xlsx')
df_json = pd.read_json('data.json')
df_sql = pd.read_sql('SELECT * FROM table', connection)

# Write to various formats
df.to_csv('output.csv')
df.to_excel('output.xlsx')
df.to_json('output.json')

**Data exploration and Inspection**

In [None]:
# Basic information
df.head()          # First 5 rows
df.tail()          # Last 5 rows
df.info()          # Data types and memory usage
df.describe()      # Statistical summary
df.shape           # (rows, columns)
df.columns         # Column names
df.index           # Index/row labels

**Data selection and Indexing**

In [None]:
# Column selection
df['Age']          # Single column (Series)
df[['Name', 'Age']]  # Multiple columns (DataFrame)

# Row selection
df.iloc[0]         # By integer position
df.loc[0]          # By label
df[df['Age'] > 30] # Boolean indexing

# Slicing
df.iloc[0:3]       # Rows 0-2
df.iloc[0:3, 1:3]  # Rows & columns

**Data Manipulation**

In [None]:
# Adding/removing columns
df['New_Column'] = df['Age'] * 2
df = df.drop('Column', axis=1)

# Sorting
df.sort_values('Age', ascending=False)

# Grouping and aggregation
df.groupby('City')['Age'].mean()
df.groupby('City').agg({'Age': ['mean', 'min', 'max']})

# Pivot tables
pd.pivot_table(df, values='Age', index='City', columns='Name')

**Merging and Joining**

In [None]:
# Concatenation
pd.concat([df1, df2], axis=0)  # Vertical
pd.concat([df1, df2], axis=1)  # Horizontal

# Merging (SQL-like joins)
pd.merge(df1, df2, on='key', how='inner')
pd.merge(df1, df2, on='key', how='outer')
pd.merge(df1, df2, on='key', how='left')
pd.merge(df1, df2, on='key', how='right')

**Time Series**

In [None]:
# Date handling
dates = pd.date_range('2024-01-01', periods=6, freq='D')
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

# Time-based operations
df.resample('M').mean()  # Monthly resampling
df.rolling(window=3).mean()  # Rolling window
df.shift(1)              # Shift data

**Advanced Features**

In [None]:
# Vectorized string operations
df['Name'].str.upper()
df['Name'].str.contains('Ali')

# Categorical data
df['City'] = df['City'].astype('category')

# MultiIndex
arrays = [['A', 'A', 'B', 'B'], [1, 2, 1, 2]]
index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second'])
df = pd.DataFrame({'data': [1, 2, 3, 4]}, index=index)

# Function application
df.apply(lambda x: x*2)  # Apply to each column
df.applymap(lambda x: x*2)  # Apply to each element

In [1]:
import pandas as pd


In [3]:
#merging and joining

data={
    'key':['k0','k1','k2','k3'],
    'A':['A0','A1','A2','A3'],
    'B':['B0','B1','B2','B3']
}
df1=pd.DataFrame(data)
df1
df2=pd.DataFrame({'key':['k0','k1','k2','k3'],
                  'C':['C0','C1','C2','C3'],
                  'D':['D0','D1','D2','D3']})
df2

df=pd.merge(df1,df2,on='key',how='inner')   #joining two dataframe
df

Unnamed: 0,key,A,B,C,D
0,k0,A0,B0,C0,D0
1,k1,A1,B1,C1,D1
2,k2,A2,B2,C2,D2
3,k3,A3,B3,C3,D3


In [6]:
#merging datasets

pd.concat([df1,df2],axis=1)

Unnamed: 0,key,A,B,C,D
0,k0,A0,B0,,
1,k1,A1,B1,,
2,k2,A2,B2,,
3,k3,A3,B3,,
0,k0,,,C0,D0
1,k1,,,C1,D1
2,k2,,,C2,D2
3,k3,,,C3,D3


Unnamed: 0,key,A,B,key.1,C,D
0,k0,A0,B0,k0,C0,D0
1,k1,A1,B1,k1,C1,D1
2,k2,A2,B2,k2,C2,D2
3,k3,A3,B3,k3,C3,D3


In [7]:
pd.merge(df1,df2,on="key")

Unnamed: 0,key,A,B,C,D
0,k0,A0,B0,C0,D0
1,k1,A1,B1,C1,D1
2,k2,A2,B2,C2,D2
3,k3,A3,B3,C3,D3
