In [34]:
import pandas as pd
import numpy as np

In [119]:
# Create empty dataframe
df = pd.DataFrame()

# Create a column
df['Name'] = ['Lauren', 'Glory', 'Mark']

# View dataframe
df

Unnamed: 0,Name
0,Lauren
1,Glory
2,Mark


In [120]:
# Assign a new column to df called 'age' with a list of ages
df = df.assign(Age = [21, 20, 19])

In [121]:
# Assign a new column to df called 'Major' in a different way
df['Major'] = ['Finance', 'Economics', 'MIS']

In [122]:
df

Unnamed: 0,Name,Age,Major
0,Lauren,21,Finance
1,Glory,20,Economics
2,Mark,19,MIS


In [123]:
df['Gender'] = ['Female','Female','Male']

In [124]:
df

Unnamed: 0,Name,Age,Major,Gender
0,Lauren,21,Finance,Female
1,Glory,20,Economics,Female
2,Mark,19,MIS,Male


### Lets go ahead and add a couple of rows to the dataframe. Lets keep it simple!!

In [126]:
df.loc[3] = ["Katey",22,"MIS","Female"]  # adding a row
df.loc[4] = ["Tapan",22,"Economics","Male"]  # adding a row

In [127]:
# Check out this link -> https://stackoverflow.com/questions/24284342/insert-a-row-to-pandas-dataframe
# for better options

In [128]:
df

Unnamed: 0,Name,Age,Major,Gender
0,Lauren,21,Finance,Female
1,Glory,20,Economics,Female
2,Mark,19,MIS,Male
3,Katey,22,MIS,Female
4,Tapan,22,Economics,Male


### Creating Column based conditions on String Values

In [129]:
df['Major_Type'] = np.where((df['Major'].isin(['MIS','IT'])),"IT","Non-IT")

In [130]:
df

Unnamed: 0,Name,Age,Major,Gender,Major_Type
0,Lauren,21,Finance,Female,Non-IT
1,Glory,20,Economics,Female,Non-IT
2,Mark,19,MIS,Male,IT
3,Katey,22,MIS,Female,IT
4,Tapan,22,Economics,Male,Non-IT


### Creating Column based SQL-like Conditions (Using AND & OR)

In [131]:
# For a single-condition, you can do this.
df['Tuition_Discount'] = np.where(((df['Major_Type'] =='IT')&(df['Gender'] == 'Female')),30,25)

In [161]:
# For multiple conditions
def categorize_tuition(row):
    if ((row['Major_Type'] =='IT')&(row['Gender'] == 'Female')):
        return 30
    if ((row['Major_Type'] =='IT')&(row['Gender'] == 'Male')):
        return 27
    if ((row['Major_Type'] =='Non-IT')&(row['Gender'] == 'Female')):
        return 35
    else:
        return 38

In [193]:
# Using lambda functions, we can call the function, "categorize_tuition" passing one row at a time.
#df['Tuition_Discount'] = df.apply(lambda row: categorize_tuition(row),axis=1)

# A much simpler representation would be as follows:
df['Tuition_Discount'] = df.apply(categorize_tuition,axis=1)

# The way we do this is by calling the function on every row. This could get really slow for all
# datasets

In [194]:
df

Unnamed: 0,Name,Age,Major,Gender,Major_Type,Tuition_Discount,Age_group
0,Lauren,21,Finance,Female,Non-IT,35,Adults
1,Glory,20,Economics,Female,Non-IT,35,Grown Ups
2,Mark,19,MIS,Male,IT,27,Teens
3,Katey,22,MIS,Female,IT,30,Adults
4,Tapan,22,Economics,Male,Non-IT,38,Adults


In [195]:
# Lets create a nested np.where expression which is not very readable but gets the job done.
df['Age_group'] = np.where(df.Age<20, 'Teens',
                           np.where(df.Age>20,'Adults', 'Grown Ups'))

In [196]:
df

Unnamed: 0,Name,Age,Major,Gender,Major_Type,Tuition_Discount,Age_group
0,Lauren,21,Finance,Female,Non-IT,35,Adults
1,Glory,20,Economics,Female,Non-IT,35,Grown Ups
2,Mark,19,MIS,Male,IT,27,Teens
3,Katey,22,MIS,Female,IT,30,Adults
4,Tapan,22,Economics,Male,Non-IT,38,Adults


#### The key argument (not to be confused with the dictionary's keys) for sorted allows us to define specific functions to use when sorting the items, as an iterator (in our dict object).