In [None]:
# Source: https://favtutor.com/blogs/pandas-exercises-python

In [1]:
# E01 - List-to-Series Conversion

# Given a list, output the corresponding pandas series

import pandas as pd

lista = [2, 4, 6, 8, 10]

series = pd.Series(lista)
series

0     2
1     4
2     6
3     8
4    10
dtype: int64

In [19]:
# E02 - List-to-Series Conversion with Custom Indexing

# Given a series, output the corresponding pandas series with odd indexes only

import pandas as pd

lista = [2, 4, 6, 8, 10, 12]

series = pd.Series(lista, index = [1, 3, 5, 7, 9, 11])
series

1      2
3      4
5      6
7      8
9     10
11    12
dtype: int64

In [5]:
# E03 - Date Series Generation

# Generate the series of dates from 1st May, 2021 to 12th May, 2021 (both inclusive)

# Beginner's way:

import pandas as pd

dates = pd.Series(['01-05-2021',
                   '02-05-2021',
                   '03-05-2021',
                   '04-05-2021',
                   '05-05-2021',
                   '06-05-2021',
                   '07-05-2021',
                   '08-05-2021',
                   '09-05-2021',
                   '10-05-2021',
                   '11-05-2021',
                   '12-05-2021'])
dates

0     01-05-2021
1     02-05-2021
2     03-05-2021
3     04-05-2021
4     05-05-2021
5     06-05-2021
6     07-05-2021
7     08-05-2021
8     09-05-2021
9     10-05-2021
10    11-05-2021
11    12-05-2021
dtype: object

In [10]:
# Big boy's way:

dates = pd.date_range(start = '05-01-2021', end = '05-12-2021')

dates

# Note that .date_range() doesn't return a Series. It returns this strange thing called DatetimeIndex. What is it?

# DatetimeIndex is an immutable ndarray-like of datetime64 data.
# Represented internally as int64, and which can be boxed to Timestamp objects that are subclasses of datetime and carry
# metadata.

DatetimeIndex(['2021-05-01', '2021-05-02', '2021-05-03', '2021-05-04',
               '2021-05-05', '2021-05-06', '2021-05-07', '2021-05-08',
               '2021-05-09', '2021-05-10', '2021-05-11', '2021-05-12'],
              dtype='datetime64[ns]', freq='D')

In [4]:
# E04 - Implementing a function on each and every element of a series

# Apply the function, f(x) = x/2 on each and every element of a given pandas series

# Beginner's way:

import pandas as pd

lista = [2, 4, 6, 8, 10, 12]
series04_in = pd.Series(lista)

series04_out = series04_in/2
series04_out

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
dtype: float64

In [5]:
# Big boy's way:

series04_out = series04_in.apply(lambda x : x/2)

series04_out

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
dtype: float64

In [13]:
# E05 - Dictionary-to-Dataframe Conversion

# Given a dictionary, convert it into corresponding dataframe and display it

import pandas as pd

dic = {'a': ['arthur'], 'b': ['bruno'], 'c': ['caio']}

df = pd.DataFrame(dic)
df

# Starting this dictionary without being inside a list (square brackets -> []), would return the error
#   "If using all scalar values, you must pass an index"

# That's because a dataframe needs indexing, and the python list is... well, ordered, so it fits

Unnamed: 0,a,b,c
0,arthur,bruno,caio


In [None]:
# E06 - 2D List-to-Dataframe Conversion

# Given a 2D List, convert it into corresponding dataframe and display it

import pandas as pd



In [None]:
# E07 - Reading CSV to Dataframe

# Given a CSV file, read it into a dataframe and display it

import pandas as pd



In [None]:
# E08 - Setting Custom Index in Dataframe

# Given a dataframe, change the index of a dataframe from the default indexes to a particular column

import pandas as pd



In [None]:
# E09 - Sorting a Dataframe by Index

# Given a dataframe (say, with custom indexing), sort it by it's index

import pandas as pd



In [None]:
# E10 - Sorting a Dataframe by Multiple Columns

# Given a dataframe, sort it by multiple columns

import pandas as pd



In [None]:
# E11 - DataFrame with Custom Index to DataFrame with Dataframe with default indexes

# Given a dataframe with custom indexing, convert and it to default indexing and display it

import pandas as pd



In [None]:
# E12 - Indexing and Selecting Columns in a DataFrame

# Given a dataframe, select a particular column and display it

import pandas as pd



In [None]:
# E13 - Indexing and Selecting Rows in a DataFrame

# Given a dataframe, select first 2 rows and output them

import pandas as pd



In [None]:
# E14 - Conditional Selection of Rows in a DataFrame

# Given a dataframe, select rows based on a condition

import pandas as pd



In [None]:
# E15 - Applying Aggregate Functions

# Given is a dataframe showing name, occupation, salary of people. Find the average salary per occupation

import pandas as pd



In [None]:
# E16 - Filling NaN Values in a DataFrame

# Given a dataframe with NaN Values, fill the NaN values with 0

import pandas as pd



In [None]:
# E17 - Applying Functions (UDFs) on DataFrame

# Given is a dataframe showing Company Names (cname) and corresponding Profits (profit).
# Convert the values of Profit column such that values in it greater than 0 are set to True and the rest are set to False.

import pandas as pd



In [None]:
# E18 - Joining 2 DataFrames by a Common Column (key)

# Given are 2 dataframes, with one dataframe containing Employee ID (eid), Employee Name (ename) and Stipend (stipend)
# and the other dataframe containing Employee ID (eid) and designation of the employee (designation).
# Output the Dataframe containing Employee ID (eid), Employee Name (ename), Stipend (stipend) and Position (position).

import pandas as pd



In [None]:
# E19 - Getting the Non-Null Count and Data Type for Every Column

# Given a dataframe, output the non-null count and data-type for every column

import pandas as pd



In [None]:
# E20 - Getting the Statistical Summary of all the Numerical Features of a DataFrame

# Given a dataframe, generate the statistical summary of all the numerical features present in it

import pandas as pd

