# 1. PANDAS BASICS

In [None]:
# Pandas is a data manipulation and analysis tool that is built on Numpy.
# Pandas uses a data structure known as DataFrame (think of it as Microsoft excel in Python). 
# DataFrames empower programmers to store and manipulate data in a tabular fashion (rows and columns).
# Series Vs. DataFrame? Series is considered a single column of a DataFrame.

In [1]:
import pandas as pd

In [2]:
# Let's define two lists as shown below:
my_list = ["AAPL", "AMZN", "T"]
my_list

['AAPL', 'AMZN', 'T']

In [3]:
label = ["stock#1", "stock#2", "stock#3"]
label

['stock#1', 'stock#2', 'stock#3']

In [4]:
type(label)

list

In [5]:
type(my_list)

list

In [7]:
# Let's create a one dimensional Pandas "series" 
# Note that series is formed of data and associated labels 
x_series = pd.Series(data=my_list, index=label)

In [8]:
# Let's view the series
x_series

stock#1    AAPL
stock#2    AMZN
stock#3       T
dtype: object

In [9]:
# Let's obtain the datatype
type(x_series)

pandas.core.series.Series

In [12]:
# Let's define a two-dimensional Pandas DataFrame
# Note that you can create a pandas dataframe from a python dictionary
data_dictionary = {
    "Bank_client_id": [111, 222, 333, 444],
    "Bank_client_name": ["Chanel", "Steve", "Mitch", "Ryan"],
    "Bank_worth[$]":[3500, 29000, 10000, 2000],
    "Year_with_bank": [3, 4, 9, 5]
}

bank_client_df = pd.DataFrame(data_dictionary)
bank_client_df

Unnamed: 0,Bank_client_id,Bank_client_name,Bank_worth[$],Year_with_bank
0,111,Chanel,3500,3
1,222,Steve,29000,4
2,333,Mitch,10000,9
3,444,Ryan,2000,5


In [13]:
# Let's obtain the data type 
type(bank_client_df)

pandas.core.frame.DataFrame

In [16]:
# you can only view the first couple of rows using .head()
bank_client_df.head(2)

Unnamed: 0,Bank_client_id,Bank_client_name,Bank_worth[$],Year_with_bank
0,111,Chanel,3500,3
1,222,Steve,29000,4


In [19]:
# you can only view the last couple of rows using .tail()
bank_client_df.tail(1)

Unnamed: 0,Bank_client_id,Bank_client_name,Bank_worth[$],Year_with_bank
3,444,Ryan,2000,5


**MINI CHALLENGE #1:**
- **A porfolio contains a collection of securities such as stocks, bonds and ETFs. Define a dataframe named 'portfolio_df' that holds 3 different stock ticker symbols, number of shares, and price per share (feel free to choose any stocks)**
- **Calculate the total value of the porfolio including all stocks**

In [21]:
# answer 1
data = {
    "stock":["AAPL", "AMZN", "NIKE", "META"],
    "price":[320, 1600, 210, 150],
    "share":[4, 5, 3, 6]
}

profile_df = pd.DataFrame(data)
profile_df

Unnamed: 0,stock,price,share
0,AAPL,320,4
1,AMZN,1600,5
2,NIKE,210,3
3,META,150,6


In [23]:
# total value
profile_df.sum()

stock    AAPLAMZNNIKEMETA
price                2280
share                  18
dtype: object

In [24]:
# answer 2
data = {
    "stock_ticker_symbol":["AAPL", "AMZN", "T"],
    "number_of_share":[3, 4, 9],
    "price_per_share[$]":[3500, 200, 40]
}

profile_df = pd.DataFrame(data)
profile_df

Unnamed: 0,stock_ticker_symbol,number_of_share,price_per_share[$]
0,AAPL,3,3500
1,AMZN,4,200
2,T,9,40


In [27]:
# total value 
stock_dollar_value = profile_df["number_of_share"]*profile_df["price_per_share[$]"]
print(stock_dollar_value)
print(f"Total portfolio value={stock_dollar_value.sum()}")

0    10500
1      800
2      360
dtype: int64
Total portfolio value=11660


# 2. PANDAS WITH CSV AND HTML DATA

In [None]:
# In order to access data on Google Drive, you need to mount the drive to access it's content


In [None]:
# Pandas is used to read a csv file and store data in a DataFrame


In [None]:
# write to a csv file without an index


In [None]:
# write to a csv file with an index


In [None]:
# Read tabular data using read_html


**MINI CHALLENGE #2:**
- **Write a code that uses Pandas to read tabular US retirement data**
- **You can use data from here: https://www.ssa.gov/oact/progdata/nra.html** 

# 3. PANDAS OPERATIONS

In [None]:
# Let's define a dataframe as follows:


In [None]:
# Pick certain rows that satisfy a certain criteria 


In [None]:
# Delete a column from a DataFrame


**MINI CHALLENGE #3:**
- **Using "bank_client_df" DataFrame, leverage pandas operations to only select high networth individuals with minimum $5000** 
- **What is the combined networth for all customers with 5000+ networth?**

# 4. PANDAS WITH FUNCTIONS

In [None]:
# Let's define a dataframe as follows:


In [None]:
# Define a function that increases all clients networth (stocks) by a fixed value of 10% (for simplicity sake) 


In [None]:
# You can apply a function to the DataFrame 


**MINI CHALLENGE #4:**
- **Define a function that doubles stock prices and adds $100**
- **Apply the function to the DataFrame**
- **Calculate the updated total networth of all clients combined**

# 5. SORTING AND ORDERING

In [None]:
# Let's define a dataframe as follows:


In [None]:
# You can sort the values in the dataframe according to number of years with bank


In [None]:
# Note that nothing changed in memory! you have to make sure that inplace is set to True


In [None]:
# Set inplace = True to ensure that change has taken place in memory 


In [None]:
# Note that now the change (ordering) took place 


**MINI CHALLENGE #5:**
- **Sort customers by networth instead of years with bank. Make sure to update values in-memory.**

# 6. CONCATENATING AND MERGING WITH PANDAS

Check this out: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html

In [None]:
# Creating a dataframe from a dictionary
# Let's define a dataframe with a list of bank clients with IDs = 1, 2, 3, 4, 5 



In [None]:
# Let's define another dataframe for a separate list of clients (IDs = 6, 7, 8, 9, 10)


In [None]:
# Let's assume we obtained additional information (Annual Salary) about our bank customers 
# Note that data obtained is for all clients with IDs 1 to 10
 

In [None]:
# Let's concatenate both dataframes #1 and #2
# Note that we now have client IDs from 1 to 10


In [None]:
# Let's merge all data on 'Bank Client ID'


**MINI CHALLENGE #6:**
- **Let's assume that you became a new client to the bank**
- **Define a new DataFrame that contains your information such as client ID (choose 11), first name, last name, and annual salary.**
- **Add this new dataframe to the original dataframe "bank_df_all".** 

# EXCELLENT JOB!