In [1]:
import os
import pandas as pd

In [2]:
def read_csv(file_path):
    '''
    Reads a file from the specified path and returns a Pandas DataFrame.

    Parameters:
    -----------
    file_path (str): The path to the input file.

    Returns:
    -----------
    df (Pandas DataFrame): The contents of the input file as a DataFrame, or None if the file does not exist.
    '''

    # check if file exists
    if not os.path.exists(file_path):
        print(f"Error: {file_path} does not exist.")
        return None

    # read file and convert to DataFrame
    try:
        df = pd.read_csv(file_path,index_col=0)
        return df

    except Exception as e:
        print(f"Error: Failed to read {file_path} as a DataFrame.")
        print(e)
        return None

In [14]:
def drop_nulls(df):
    '''
    Drops rows with null values that are less than 1% of the total and columns with null values
    that are greater than 30% of the total.

    Parameters:
    -----------
    df (Pandas DataFrame): The input DataFrame to be processed.

    Returns:
    -----------
    df (Pandas DataFrame): The processed DataFrame.
    '''
    
    # Calculate the percentage of null values in each column
    percent_null = df.isnull().sum() / len(df)

    # Drop columns with null values exceeding 30%
    columns_to_drop = percent_null[percent_null > 0.3].index
    df = df.drop(columns=columns_to_drop)

    # Drop rows with null values less than 1%
    rows_to_drop = df.isnull().sum(axis=1) / len(df.columns) < 0.01
    df = df.loc[rows_to_drop, :]

    return df

In [3]:
df = read_csv("almost_there.csv")

In [15]:
df = drop_nulls(df)

In [21]:
df = df.reset_index(drop=True)

In [22]:
df.head(15)

Unnamed: 0,title,summary,year_published,author,review_count,number_of_ratings,length,genre,rating
0,Harry Potter and the Deathly Hallows,"Harry has been burdened with a dark, dangerous...",2007,J.K. Rowling,78723,3439426,759,Fantasy,4.62
1,The Hunger Games,"Could you survive on your own in the wild, wit...",2008,Suzanne Collins,192533,7882293,374,Young Adult,4.33
2,The Kite Runner,1970s Afghanistan: Twelve-year-old Amir is des...,2003,Khaled Hosseini,90080,2931026,371,Fiction,4.33
3,The Book Thief,Librarian's note: An alternate cover edition c...,2006,Markus Zusak,134634,2341042,552,Historical Fiction,4.39
4,Harry Potter and the Half-Blood Prince,"It is the middle of the summer, but there is a...",2005,J.K. Rowling,51340,3021655,652,Fantasy,4.58
5,Harry Potter and the Order of the Phoenix,Harry Potter is about to start his fifth year ...,2003,J.K. Rowling,56241,3119007,912,Fantasy,4.5
6,The Help,Three ordinary women are about to take one ext...,2009,Kathryn Stockett,88236,2582493,464,Fiction,4.47
7,A Thousand Splendid Suns,Mariam is only fifteen when she is sent to Kab...,2007,Khaled Hosseini,69244,1414796,372,Fiction,4.42
8,Life of Pi,Life of Pi is a fantasy adventure novel by Yan...,2001,Yann Martel,51216,1542892,460,Fiction,3.93
9,Catching Fire,Sparks are igniting.Flames are spreading.And t...,2009,Suzanne Collins,108020,3192911,391,Young Adult,4.31
