In [1]:
import pandas as pd
import numpy as np

In [2]:
# creating pandas dataframe
original_dataframe=pd.read_csv("IMDB-Movie-Data.csv")

In [3]:
# converting all column names into lower case
original_dataframe.columns=[col.lower() for col in original_dataframe]

In [4]:
# observe data
original_dataframe.head()

Unnamed: 0,rank,title,genre,description,director,actors,year,runtime (minutes),rating,votes,revenue (millions),metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0


In [5]:
# observe the data types of attributes
original_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   rank                1000 non-null   int64  
 1   title               1000 non-null   object 
 2   genre               1000 non-null   object 
 3   description         1000 non-null   object 
 4   director            1000 non-null   object 
 5   actors              1000 non-null   object 
 6   year                1000 non-null   int64  
 7   runtime (minutes)   1000 non-null   int64  
 8   rating              1000 non-null   float64
 9   votes               1000 non-null   int64  
 10  revenue (millions)  872 non-null    float64
 11  metascore           936 non-null    float64
dtypes: float64(3), int64(4), object(5)
memory usage: 93.9+ KB


In [6]:
def rating_function(rate):
    """
    Converts ratings into categorical data
    
    Parameters:
                rate (float)
    Returns:
            string
    
    """
    if rate >= 7 and rate <=10:
        return "better"
    elif rate < 7 and rate >= 5:
        return "good"
    elif rate < 5 and rate >=3:
        return "bad"
    elif rate < 3 and rate >=0: 
        return "worst"

In [7]:
# map,apply,applymap
original_dataframe["rating_category"]=original_dataframe["rating"].apply(rating_function)

In [8]:
original_dataframe.head()

Unnamed: 0,rank,title,genre,description,director,actors,year,runtime (minutes),rating,votes,revenue (millions),metascore,rating_category
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0,better
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0,better
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0,better
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0,better
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0,good


In [9]:
# getting counts of category
original_dataframe["rating_category"].value_counts()

good      512
better    445
bad        40
worst       3
Name: rating_category, dtype: int64

In [10]:
#original_dataframe["rating"].max()

In [11]:
# as we converted rating data into categorical data
# so we need to delete original rating column in dataframe which
# contains float values
modified_dataframe=original_dataframe.drop(["rating"],axis=1)

In [12]:
# now we can see our original dataframe got modified 
# and there is only one column for rating 
# which contains categoriacl data
modified_dataframe.head()

Unnamed: 0,rank,title,genre,description,director,actors,year,runtime (minutes),votes,revenue (millions),metascore,rating_category
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,757074,333.13,76.0,better
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,485820,126.46,65.0,better
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,157606,138.12,62.0,better
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,60545,270.32,59.0,better
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,393727,325.02,40.0,good


In [13]:
# our dictionary with category and its corresponding integer value
data_dict={"better":4,"good":3,"bad":2,"worst":1}

In [14]:
# dictionary is
data_dict

{'better': 4, 'good': 3, 'bad': 2, 'worst': 1}

In [15]:
# replacing categorical values with its corresponding integer value
for i in modified_dataframe.index:
    modified_dataframe["rating_category"][i]=data_dict[modified_dataframe["rating_category"][i]]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [16]:
# observe modified data
# rating_category contains integer values
modified_dataframe.head()

Unnamed: 0,rank,title,genre,description,director,actors,year,runtime (minutes),votes,revenue (millions),metascore,rating_category
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,757074,333.13,76.0,4
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,485820,126.46,65.0,4
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,157606,138.12,62.0,4
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,60545,270.32,59.0,4
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,393727,325.02,40.0,3
