# 1. Importing Pandas

In [None]:
import pandas as pd

# 2. Creating Data Structures

### a. Series

In [None]:

# Creating a Series from a list
s = pd.Series([1, 3, 5, 7, 9])
print(s)


## b. DataFrame

### A two-dimensional, tabular data structure.

In [None]:
# Creating a DataFrame from a dictionary
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'Los Angeles', 'Chicago']}

df = pd.DataFrame(data)
print(df)


## 3. Reading and Writing Data

### a. Reading from CSV

In [None]:
df = pd.read_csv('data.csv')
print(df.head())


### b. Writing to CSV

In [None]:
df.to_csv('output.csv', index=False)


# 4. Viewing Data

In [None]:
print(df.head())       # First 5 rows
print(df.tail())       # Last 5 rows
print(df.shape)        # Rows and columns
print(df.info())       # Summary of DataFrame
print(df.describe())   # Statistical summary


# 5. Selecting Data

### a. Selecting Columns

In [None]:
print(df['Name'])      # Single column
print(df[['Name', 'Age']])  # Multiple columns


### b. Selecting Rows

In [None]:
print(df.iloc[0])      # By index position
#print(df.loc[0])       # By index label (if set)


### DataFrame.iat
### Fast integer location scalar accessor.

### DataFrame.loc
### Purely label-location based indexer for selection by label.

### Series.iloc
### Purely integer-location based indexing for selection by position.

# c. Conditional Selection

In [None]:
print(df[df['Age'] > 30])


# 6. Modifying Data

### a. Adding Columns

In [None]:
df['Salary'] = [50000, 60000, 70000]
print(df)


### b. Updating Values

In [None]:
df.at[0, 'Age'] = 26
print(df)


In [None]:
df.at[2,"Age"]=40

In [None]:
df

## c. Deleting Columns/Rows

In [None]:
df.drop('Salary', axis=1, inplace=True)  # Drop column
df.drop(0, axis=0, inplace=True)         # Drop row
print(df)


## 7. Handling Missing Data

In [None]:
df = pd.DataFrame({'A': [1, 2, None], 'B': [4, None, 6]})

# Check for missing values
print(df.isnull())

# Fill missing values
df.fillna(0, inplace=True)
print(df)

# Drop rows with missing values
df.dropna(inplace=True)
print(df)


## 8. Working with Index

In [None]:
df.set_index('Name', inplace=True)
print(df)

df.reset_index(inplace=True)
print(df)


# 9. Sorting Data

In [None]:
# Sort by column
df.sort_values('Age', ascending=False, inplace=True)
print(df)

# Sort by index
df.sort_index(inplace=True)
print(df)


# 10. Grouping Data

In [None]:
data = {'Category': ['A', 'B', 'A', 'B'], 'Values': [10, 20, 30, 40]}
df = pd.DataFrame(data)

# Group by 'Category' and sum 'Values'
grouped = df.groupby('Category')['Values'].sum()
print(grouped)


# 11. Merging and Joining DataFrames

### a. Concatenation

In [None]:
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})

result = pd.concat([df1, df2])
print(result)


### b. Merging

In [None]:
df1 = pd.DataFrame({'key': ['K0', 'K1'], 'A': ['A0', 'A1']})
df2 = pd.DataFrame({'key': ['K0', 'K1'], 'B': ['B0', 'B1']})

result = pd.merge(df1, df2, on='key')
print(result)


### c. Joining

df1 = pd.DataFrame({'A': ['A0', 'A1']}, index=['K0', 'K1'])
df2 = pd.DataFrame({'B': ['B0', 'B1']}, index=['K0', 'K1'])

result = df1.join(df2)
print(result)


# 12. Pivot Tables

data = {'Category': ['A', 'B', 'A', 'B'], 'Type': ['X', 'Y', 'X', 'Y'], 'Values': [10, 20, 30, 40]}
df = pd.DataFrame(data)

pivot_table = df.pivot_table(values='Values', index='Category', columns='Type', aggfunc='sum')
print(pivot_table)


#  13. Applying Functions

In [None]:
# Creating a date range
dates = pd.date_range('2024-01-01', periods=5)
print(dates)

# Adding date to DataFrame
df['Date'] = dates
print(df)



## 14. Working with Dates

In [None]:
# Creating a date range
dates = pd.date_range('2024-01-01', periods=5)
print(dates)

# Adding date to DataFrame
df['Date'] = dates
print(df)


## 15. Exporting Data to Other Formats

In [None]:
# Export to Excel
df.to_excel('output.xlsx', index=False)

# Export to JSON
df.to_json('output.json')


# 16. Advanced Data Types

## a. Categorical Data

In [None]:
df['Category'] = pd.Categorical(['test', 'train', 'test', 'train'])
print(df.dtypes)


## b. Handling Time Series

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
print(df.set_index('Date').resample('D').mean())
