#### `Import Libraries`

In [2]:
import pandas as pd 
import numpy as np 
from io import StringIO
from IPython.core.display import HTML, display

#### `Required Functions`

In [3]:
def load_data():
    df = pd.read_table(StringIO(companya_sales_data), sep=",")
    return df
    
def split_fruit(df):
    def split_fruit_series(string):
        fruit, class_name = string.split("-")
        return pd.Series((fruit, class_name), index="FruitName ClassName".split()) 
    # .apply() applies function  def split_fruit_series() on column : Fruit
    df_temp = df["Fruit"].apply(split_fruit_series)
    # Adding new created FruitName, ClassName columns to the dataframe : df 
    df[df_temp.columns] = df_temp
    return df 

def change_country_codes(df):
    # .map() is used to replace each value of column by referring with dictionary values
    mapping={"India":"IN", "USA":"US", "Germany":"GN", "Australia":"AU", "France":"FN", "Italy":"IT", "England":"EN"}
    df["Country"] = df["Country"].map(mapping)
    return df

def missing_values_imputation(df, na_map):
    na_flags = df["Product_Sell"].isna()
    ##### It gets the Country Code of missing value records ##### 
    df_temp = df.loc[na_flags, "Country"].map(na_map)
    ##### Updating mapped values records to main dataframe #####
    df.loc[na_flags, "Product_Sell"] = df_temp
    return df

def create_numerical_ranges(df):
    bins = [0, 1500, 2000, 2800, sys.maxsize]
    labels = ["Low", "Average", "Good", "Excellent"]
    revenue_groups = pd.cut(df["Revenue_Generation"], bins=bins, labels=labels)
    df["Revenue_Groups"] = revenue_groups
    return df

#### `Load Sample Data`

In [4]:
companya_sales_data = """
Country,Fruit,Product_Sell,Revenue_Generation
India,Apple-Top class,10,1000
Germany,Apple-Top class,15,1400
Australia,Apple-Medium class,18,1800
USA,Apple-Low class,22,2400
Germany,Mango-Top class,26,2600
France,Mango-Top class,30,2800
Italy,Mango-Low class,nan, 2343
England,Mago-Medium class,5,700
India,Apple-Top class,31,2900
"""

In [5]:
df_1 = (load_data()) # Load Data
display(HTML(df_1.to_html()))

Unnamed: 0,Country,Fruit,Product_Sell,Revenue_Generation
0,India,Apple-Top class,10.0,1000
1,Germany,Apple-Top class,15.0,1400
2,Australia,Apple-Medium class,18.0,1800
3,USA,Apple-Low class,22.0,2400
4,Germany,Mango-Top class,26.0,2600
5,France,Mango-Top class,30.0,2800
6,Italy,Mango-Low class,,2343
7,England,Mago-Medium class,5.0,700
8,India,Apple-Top class,31.0,2900


#### `Split Fruit into : Fruit_Name & Class`

In [6]:
df_1 = (load_data() # Load Datad
        .pipe(split_fruit) # Splitting Fruit into FruitName, ClassName
        )  
display(HTML(df_1.to_html()))

Unnamed: 0,Country,Fruit,Product_Sell,Revenue_Generation,FruitName,ClassName
0,India,Apple-Top class,10.0,1000,Apple,Top class
1,Germany,Apple-Top class,15.0,1400,Apple,Top class
2,Australia,Apple-Medium class,18.0,1800,Apple,Medium class
3,USA,Apple-Low class,22.0,2400,Apple,Low class
4,Germany,Mango-Top class,26.0,2600,Mango,Top class
5,France,Mango-Top class,30.0,2800,Mango,Top class
6,Italy,Mango-Low class,,2343,Mango,Low class
7,England,Mago-Medium class,5.0,700,Mago,Medium class
8,India,Apple-Top class,31.0,2900,Apple,Top class


#### `Replacing Country Names into Country Codes`

In [7]:
df_1 = (load_data() # Load Datad
        .pipe(split_fruit) # Splitting Fruit into FruitName, ClassName
        .pipe(change_country_codes) # Replacing Country Names into Country Codes
        )  
display(HTML(df_1.to_html()))

Unnamed: 0,Country,Fruit,Product_Sell,Revenue_Generation,FruitName,ClassName
0,IN,Apple-Top class,10.0,1000,Apple,Top class
1,GN,Apple-Top class,15.0,1400,Apple,Top class
2,AU,Apple-Medium class,18.0,1800,Apple,Medium class
3,US,Apple-Low class,22.0,2400,Apple,Low class
4,GN,Mango-Top class,26.0,2600,Mango,Top class
5,FN,Mango-Top class,30.0,2800,Mango,Top class
6,IT,Mango-Low class,,2343,Mango,Low class
7,EN,Mago-Medium class,5.0,700,Mago,Medium class
8,IN,Apple-Top class,31.0,2900,Apple,Top class


#### `Missing Values Imputation`

In [8]:
##### This dictionary used to map & replace values of missing values, checking missing value relates to which country #####
country_na_map = {
    "IN": 15,
    "GN": 35,
    "US": 40,
    "AU": 12,
    "IT": 20,
    "EN": 32
}

In [9]:
df_1 = (load_data() # Load Datad
        .pipe(split_fruit) # Splitting Fruit into FruitName, ClassName
        .pipe(change_country_codes) # Replacing Country Names into Country Codes
        .pipe(missing_values_imputation, country_na_map) # Missing Values Imputation
        )  
display(HTML(df_1.to_html()))

Unnamed: 0,Country,Fruit,Product_Sell,Revenue_Generation,FruitName,ClassName
0,IN,Apple-Top class,10.0,1000,Apple,Top class
1,GN,Apple-Top class,15.0,1400,Apple,Top class
2,AU,Apple-Medium class,18.0,1800,Apple,Medium class
3,US,Apple-Low class,22.0,2400,Apple,Low class
4,GN,Mango-Top class,26.0,2600,Mango,Top class
5,FN,Mango-Top class,30.0,2800,Mango,Top class
6,IT,Mango-Low class,20.0,2343,Mango,Low class
7,EN,Mago-Medium class,5.0,700,Mago,Medium class
8,IN,Apple-Top class,31.0,2900,Apple,Top class


#### `Revenue(Numerical) Grouping`

In [10]:
df_1 = (load_data() # Load Datad
        .pipe(split_fruit) # Splitting Fruit into FruitName, ClassName
        .pipe(change_country_codes) # Replacing Country Names into Country Codes
        .pipe(missing_values_imputation, country_na_map) # Missing Values Imputation
        .pipe(create_numerical_ranges) # Create Revenue Groups
        )  
display(HTML(df_1.to_html()))

Unnamed: 0,Country,Fruit,Product_Sell,Revenue_Generation,FruitName,ClassName,Revenue_Groups
0,IN,Apple-Top class,10.0,1000,Apple,Top class,Low
1,GN,Apple-Top class,15.0,1400,Apple,Top class,Low
2,AU,Apple-Medium class,18.0,1800,Apple,Medium class,Average
3,US,Apple-Low class,22.0,2400,Apple,Low class,Good
4,GN,Mango-Top class,26.0,2600,Mango,Top class,Good
5,FN,Mango-Top class,30.0,2800,Mango,Top class,Good
6,IT,Mango-Low class,20.0,2343,Mango,Low class,Good
7,EN,Mago-Medium class,5.0,700,Mago,Medium class,Low
8,IN,Apple-Top class,31.0,2900,Apple,Top class,Excellent
