In [1]:
import pandas as pd

In [2]:
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
dr =  [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]
rows_index = ['US','AUS','JP','IN','RUS','MOR','EGY']

my_dict = {'country' : names,'drives_right' : dr,'cars_per_cap' : cpc}
cars = pd.DataFrame(my_dict, index = rows_index)
cars

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JP,Japan,False,588
IN,India,False,18
RUS,Russia,True,200
MOR,Morocco,True,70
EGY,Egypt,True,45


In [3]:
cars.shape

(7, 3)

In [4]:
cars.head()

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JP,Japan,False,588
IN,India,False,18
RUS,Russia,True,200


In [5]:
# shows information on each of the columns, such as the data type and number of missing values.
cars.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, US to EGY
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   country       7 non-null      object
 1   drives_right  7 non-null      bool  
 2   cars_per_cap  7 non-null      int64 
dtypes: bool(1), int64(1), object(1)
memory usage: 175.0+ bytes


In [6]:
cars.describe()

Unnamed: 0,cars_per_cap
count,7.0
mean,351.571429
std,345.595552
min,18.0
25%,57.5
50%,200.0
75%,659.5
max,809.0


In [7]:
cars.values

array([['United States', True, 809],
       ['Australia', False, 731],
       ['Japan', False, 588],
       ['India', False, 18],
       ['Russia', True, 200],
       ['Morocco', True, 70],
       ['Egypt', True, 45]], dtype=object)

In [8]:
cars.columns

Index(['country', 'drives_right', 'cars_per_cap'], dtype='object')

In [9]:
cars.index

Index(['US', 'AUS', 'JP', 'IN', 'RUS', 'MOR', 'EGY'], dtype='object')

In [10]:
cars.sort_values("cars_per_cap")

Unnamed: 0,country,drives_right,cars_per_cap
IN,India,False,18
EGY,Egypt,True,45
MOR,Morocco,True,70
RUS,Russia,True,200
JP,Japan,False,588
AUS,Australia,False,731
US,United States,True,809


In [11]:
cars.sort_values("cars_per_cap", ascending=False)

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JP,Japan,False,588
RUS,Russia,True,200
MOR,Morocco,True,70
EGY,Egypt,True,45
IN,India,False,18


In [12]:
cars.sort_values(["cars_per_cap","country"], ascending=[False,True])

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JP,Japan,False,588
RUS,Russia,True,200
MOR,Morocco,True,70
EGY,Egypt,True,45
IN,India,False,18


In [13]:
def ptc30(column):
    return column.quantile(0.3)
    
cars["cars_per_cap"].agg([ptc30])


ptc30    65.0
Name: cars_per_cap, dtype: float64

In [14]:
# returns cumulative sum of all the elements in a column
cars["cars_per_cap"].cumsum()

US      809
AUS    1540
JP     2128
IN     2146
RUS    2346
MOR    2416
EGY    2461
Name: cars_per_cap, dtype: int64

In [15]:
cars["cars_per_cap"].agg([sum])

sum    2461
Name: cars_per_cap, dtype: int64

In [16]:
# since there is no duplicate in this Dataframe with respect to country therefore it's printing same data
# if want to check duplicates with respect to different columns then : drop_duplicates(subset=["country","cars_per_cap"])
unique_country = cars.drop_duplicates(subset="country")
unique_country

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JP,Japan,False,588
IN,India,False,18
RUS,Russia,True,200
MOR,Morocco,True,70
EGY,Egypt,True,45


In [17]:
unique_country["country"].value_counts(sort=True)

country
United States    1
Australia        1
Japan            1
India            1
Russia           1
Morocco          1
Egypt            1
Name: count, dtype: int64

In [18]:
cars["cars_per_cap"].value_counts(normalize=True)

cars_per_cap
809    0.142857
731    0.142857
588    0.142857
18     0.142857
200    0.142857
70     0.142857
45     0.142857
Name: proportion, dtype: float64