### 1. Data exploration, here we assign the data, and then we visualize the data in a tabular format.

In [1]:
import pandas as pd
 
# Assign data
data = {'Name': ['Jai', 'Princi', 'Gaurav',
                 'Anuj', 'Ravi', 'Natasha', 'Riya'],
        'Age': [17, 17, 18, 17, 18, 17, 17],
        'Gender': ['M', 'F', 'M', 'M', 'M', 'F', 'F'],
        'Marks': [90, 76, 'NaN', 74, 65, 'NaN', 71]}
 
# Convert into DataFrame
df = pd.DataFrame(data)
 
# Display data
df

Unnamed: 0,Name,Age,Gender,Marks
0,Jai,17,M,90.0
1,Princi,17,F,76.0
2,Gaurav,18,M,
3,Anuj,17,M,74.0
4,Ravi,18,M,65.0
5,Natasha,17,F,
6,Riya,17,F,71.0


### 2. Dealing with missing values, as we can see from the previous output, there are NaN values present in the MARKS column which are going to be taken care of by replacing them with the column mean.

In [2]:
# Compute average
c = avg = 0
for ele in df['Marks']:
    if str(ele).isnumeric():
        c += 1
        avg += ele
avg /= c
 
# Replace missing values
df = df.replace(to_replace="NaN",
                value=avg)
 
# Display data
df

Unnamed: 0,Name,Age,Gender,Marks
0,Jai,17,M,90.0
1,Princi,17,F,76.0
2,Gaurav,18,M,75.2
3,Anuj,17,M,74.0
4,Ravi,18,M,65.0
5,Natasha,17,F,75.2
6,Riya,17,F,71.0


### 3. Reshaping data, in the GENDER column, we can reshape the data by categorizing them into different numbers.

In [3]:
# Categorize gender
df['Gender'] = df['Gender'].map({'M': 0,
                                 'F': 1, }).astype(float)
 
# Display data
df

Unnamed: 0,Name,Age,Gender,Marks
0,Jai,17,0.0,90.0
1,Princi,17,1.0,76.0
2,Gaurav,18,0.0,75.2
3,Anuj,17,0.0,74.0
4,Ravi,18,0.0,65.0
5,Natasha,17,1.0,75.2
6,Riya,17,1.0,71.0


### 4. Filtering data, suppose there is a requirement for the details regarding name, gender, marks of the top-scoring students. Here we need to remove some unwanted data.

In [4]:
# Filter top scoring students
df = df[df['Marks'] >= 75]
 
# Remove age row
df = df.drop(['Age'], axis=1)
 
# Display data
df

Unnamed: 0,Name,Gender,Marks
0,Jai,0.0,90.0
1,Princi,1.0,76.0
2,Gaurav,0.0,75.2
5,Natasha,1.0,75.2


### Wrangling Data Using Merge Operation pd.merge( data_frame1,data_frame2, on="field ")

In [5]:
# import module
import pandas as pd   
 
# creating DataFrame for Student Details
details = pd.DataFrame({
    'ID': [101, 102, 103, 104, 105, 106,
           107, 108, 109, 110],
    'NAME': ['Jagroop', 'Praveen', 'Harjot',
             'Pooja', 'Rahul', 'Nikita',
             'Saurabh', 'Ayush', 'Dolly', "Mohit"],
    'BRANCH': ['CSE', 'CSE', 'CSE', 'CSE', 'CSE',
               'CSE', 'CSE', 'CSE', 'CSE', 'CSE']})
 
# printing details
print(details)

    ID     NAME BRANCH
0  101  Jagroop    CSE
1  102  Praveen    CSE
2  103   Harjot    CSE
3  104    Pooja    CSE
4  105    Rahul    CSE
5  106   Nikita    CSE
6  107  Saurabh    CSE
7  108    Ayush    CSE
8  109    Dolly    CSE
9  110    Mohit    CSE


### SECOND TYPE OF DATA

In [6]:
# Import module
import pandas as pd
 
# Creating Dataframe for Fees_Status
fees_status = pd.DataFrame(
    {'ID': [101, 102, 103, 104, 105,
            106, 107, 108, 109, 110],
     'PENDING': ['5000', '250', 'NIL',
                 '9000', '15000', 'NIL',
                 '4500', '1800', '250', 'NIL']})
 
# Printing fees_status
print(fees_status)

    ID PENDING
0  101    5000
1  102     250
2  103     NIL
3  104    9000
4  105   15000
5  106     NIL
6  107    4500
7  108    1800
8  109     250
9  110     NIL


### WRANGLING DATA USING MERGE OPERATION:

In [7]:
# Import module
import pandas as pd
 
# Creating Dataframe
details = pd.DataFrame({
    'ID': [101, 102, 103, 104, 105,
           106, 107, 108, 109, 110],
    'NAME': ['Jagroop', 'Praveen', 'Harjot',
             'Pooja', 'Rahul', 'Nikita',
             'Saurabh', 'Ayush', 'Dolly', "Mohit"],
    'BRANCH': ['CSE', 'CSE', 'CSE', 'CSE', 'CSE',
               'CSE', 'CSE', 'CSE', 'CSE', 'CSE']})
 
# Creating Dataframe
fees_status = pd.DataFrame(
    {'ID': [101, 102, 103, 104, 105,
            106, 107, 108, 109, 110],
     'PENDING': ['5000', '250', 'NIL',
                 '9000', '15000', 'NIL',
                 '4500', '1800', '250', 'NIL']})
 
# Merging Dataframe
print(pd.merge(details, fees_status, on='ID'))

    ID     NAME BRANCH PENDING
0  101  Jagroop    CSE    5000
1  102  Praveen    CSE     250
2  103   Harjot    CSE     NIL
3  104    Pooja    CSE    9000
4  105    Rahul    CSE   15000
5  106   Nikita    CSE     NIL
6  107  Saurabh    CSE    4500
7  108    Ayush    CSE    1800
8  109    Dolly    CSE     250
9  110    Mohit    CSE     NIL
