In [79]:
'''
INTRODUCTION TO PANDAS 🐼
Python library for data analysis and manipulation

Two main data structures:
1D - Series (list)
2D - DataFrame (matrix)

Why?
- Support for CSV, Excel, SQL, JSON etc.
- Powerful data aggregation and grouping
- Easy handling of missing data
- Convenient for filtering, joining, and reshaping
'''

import pandas as pd
import numpy as np

# Dictionary to DataFrame
friends = {
    'name': ['Fardin', 'Adnan', 'Shahnawaz'],
    'city': ['Pune', 'Mumbai', 'Solapur'],
    'marks': [98, 99, 100]
}

# Create DataFrame
df = pd.DataFrame(friends)
df  # show

# Save to CSV with and without index
df.to_csv('friends.csv')  
df.to_csv('friends.csv', index=False)

# Top, bottom, summary
df.head(1)
df.tail(1)
df.describe()

# Read from CSV
new = pd.read_csv('new.csv')
new  # show

new.describe()
new['city']
new['city'][3]
new.loc[3, 'city'] = 'Shrinagar'
new
new.to_csv('new.csv', index=False)
new

# Custom index
new.index = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
new

# Series example
sr = pd.Series([1,2,3,4,5,6,7,8,9,10])
sr
type(sr)
type(df)

# Random Series
sr1 = pd.Series(np.random.randint(1, 11, size=5))
sr1

# Random DataFrame
randomDf = pd.DataFrame(np.random.randint(1, 101, size=(5,5)))
randomDf

# With explicit index
randomDf = pd.DataFrame(np.random.randint(1, 101, size=(5,5)), index=np.arange(5))
randomDf
randomDf.describe()

# Larger random DataFrame
randomDf = pd.DataFrame(np.random.randint(1, 101, size=(300,5)), index=np.arange(300))
randomDf
randomDf.head(5)
randomDf.tail(5)
randomDf.index
randomDf.columns
randomDf.to_numpy()

# Custom Series with index
s1 = pd.Series([10,20,30], index=['a','b','c'])
s1
s1['a']

# Modify dictionary before DataFrame
friends['Number'] = ['1','2','3']
friends

# Create DataFrame from list of lists
df1 = pd.DataFrame([
    ['Tom', 28],
    ['Jerry', 31]
], columns=['Name', 'Age'])

df1['City'] = ['Pune', 'Mumbai']
df1

# Drop column (view only)
df1.drop('Age', axis=1)

# Drop column permanently
df1.drop('Age', axis=1, inplace=True)
df1

# Add new column
df1['Number'] = ['1', '2']
df1

# Access a column
df1['Name']

0      Tom
1    Jerry
Name: Name, dtype: object