In [2]:
import pandas as pd

### HELPEPRS
def _P(buff, head=None) -> None:
    """A helper function to print the buffer just to reduce the amount of typing
    used when there are multiple print statements.

    Args:
        buff (Any): The output to print.
        head (str, optional): A decorated header. Defaults to None.
    """
    if head is not None:
        print(f"---[{head}]---")
    else:
        print("--------")
    print(buff)
    print("\n")

In [3]:
df = pd.DataFrame(
    {
        "Weight": [45,88,56,15,71],
        "Name": ["Sam","Andrea","Alex","Robin","Kia"],
        "Age": [14,25,55,8,21]
    }
)

# Indexs for rows
index_ = [
    "Row_1",
    "Row_2",
    "Row_3",
    "Row_4",
    "Row_5"
]

# Indexs for rows
df.index = index_

# print out the dataframe
df.head()

# df.loc
_P(df.loc["Row_1"])

--------
Weight     45
Name      Sam
Age        14
Name: Row_1, dtype: object




In [4]:
df = pd.DataFrame(
    {
        "A":[12,4,5,2,1],
        "B":[7,2,54,3,4],
        "C":[20,16,11,3,8],
        "D":[14,3,6,2,6],
        "E":[10,20,30,40,50]
    }
)

# Indexs for rows
index_ = [
    "Row_1",
    "Row_2",
    "Row_3",
    "Row_4",
    "Row_5"
]

_P(df.head())
df.index = index_
_P(df.head())

_P(df.loc["Row_1"])

# Slcing
_P(df.loc["Row_1":"Row_3"])
_P(df.loc[:, "A":"C"])

--------
    A   B   C   D   E
0  12   7  20  14  10
1   4   2  16   3  20
2   5  54  11   6  30
3   2   3   3   2  40
4   1   4   8   6  50


--------
        A   B   C   D   E
Row_1  12   7  20  14  10
Row_2   4   2  16   3  20
Row_3   5  54  11   6  30
Row_4   2   3   3   2  40
Row_5   1   4   8   6  50


--------
A    12
B     7
C    20
D    14
E    10
Name: Row_1, dtype: int64


--------
        A   B   C   D   E
Row_1  12   7  20  14  10
Row_2   4   2  16   3  20
Row_3   5  54  11   6  30


--------
        A   B   C
Row_1  12   7  20
Row_2   4   2  16
Row_3   5  54  11
Row_4   2   3   3
Row_5   1   4   8




In [5]:

car_data = {
    "Model Name": [
        "Valiant",
        "Duster 360",
        "Merc 240D",
        "Merc 230",
        "Merc 280",
        "Merc 280C",
        "Merc 450SE",
        "Merc 450SL",
        "Cadillac Fleetwood",
        "Lincoln Continental",
        "Chrysler Imperial",
        "Fiat 128",
        "Honda Civic",
        "Toyota Corolla",
    ],
    "Gear": [3,3,4,4,4,4,3,3,3,3,3,4,4,4],
    "Cylinder": [6,8,4,4,6,6,8,8,8,8,8,4,4,4],
}

car_data_df = pd.DataFrame(car_data)
car_data_df

_P(car_data_df['Gear'].unique(), "Unique values in Gear column")
_P(car_data_df['Cylinder'].unique(), "Unique values in Cylinder column")

_P(car_data_df['Gear'].value_counts(), "Value counts for Gear column")
_P(car_data_df['Cylinder'].value_counts(), "Value counts for Cylinder column")

# number of unique values
_P(car_data_df['Gear'].nunique(), "Number of unique values in Gear column")
_P(car_data_df['Cylinder'].nunique(), "Number of unique values in Cylinder column")

---[Unique values in Gear column]---
[3 4]


---[Unique values in Cylinder column]---
[6 8 4]


---[Value counts for Gear column]---
3    7
4    7
Name: Gear, dtype: int64


---[Value counts for Cylinder column]---
8    6
4    5
6    3
Name: Cylinder, dtype: int64


---[Number of unique values in Gear column]---
2


---[Number of unique values in Cylinder column]---
3




In [6]:
import numpy as np

dict = {
    "FS": [100,90,np.nan,95],
    "SS": [30,45,56,np.nan],
    "TS": [np.nan,40,80,98]
}

df = pd.DataFrame(dict)

df.head()
_P(df.isnull(), "Is null")

---[Is null]---
      FS     SS     TS
0  False  False   True
1  False  False  False
2   True  False  False
3  False   True  False




In [26]:
ds1 = pd.read_csv("ds1.csv")
bool_s = pd.isnull(ds1["Gender"])
_P(bool_s, "Is null")
ds1[bool_s]

# for col in ds1.columns:
#     print(col)
#     _P(ds1[pd.isnull(ds1[col])])

# check for null values in all columns
ds1.columns
ds1.isnull().sum()

---[Is null]---
0     False
1      True
2     False
3     False
4     False
      ...  
95    False
96    False
97    False
98    False
99    False
Name: Gender, Length: 100, dtype: bool




Emp ID                  0
Name Prefix             0
First Name              0
Middle Initial          0
Last Name               0
Gender                  1
E Mail                  0
Father's Name           0
Mother's Name           0
Mother's Maiden Name    0
Salary                  0
dtype: int64

In [36]:
emp = pd.read_csv("employees.csv")
_P(emp.isnull().sum(), "Is null")
_P(emp.notnull().sum(), "Is not null")
# emp.head()

# drop rows with null values

_P(emp.dropna().isnull().sum(), "Is null")
emp.dropna().head()


---[Is null]---
First Name            67
Gender               145
Start Date             0
Last Login Time        0
Salary                 0
Bonus %                0
Senior Management     67
Team                  43
dtype: int64


---[Is not null]---
First Name            933
Gender                855
Start Date           1000
Last Login Time      1000
Salary               1000
Bonus %              1000
Senior Management     933
Team                  957
dtype: int64


---[Is null]---
First Name           0
Gender               0
Start Date           0
Last Login Time      0
Salary               0
Bonus %              0
Senior Management    0
Team                 0
dtype: int64




Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
2,Maria,Female,4/23/1993,11:17 AM,130590,11.858,False,Finance
3,Jerry,Male,3/4/2005,1:00 PM,138705,9.34,True,Finance
4,Larry,Male,1/24/1998,4:47 PM,101004,1.389,True,Client Services
5,Dennis,Male,4/18/1987,1:35 AM,115163,10.125,False,Legal
