In [40]:
import pandas as pd
from pandas import DataFrame
import os

In [None]:
# filepath = './data/netflix_titles.csv'

# df = pd.read_csv(filepath, header=0, sep=',').replace('\n','', regex=True)

def csv_to_dataframe(filepath:str, index:str) -> None:
    """
    ### Load a csv file into a pandas dataframe.

    Returns inserted csv into 'df' variable.

     - filepath: string of filepath to csv file.

     - index: string of column to index dataframe by.
    """
    df = pd.read_csv(filepath, header=0, index_col=[index])
    return df

In [41]:
# Dataframe Details
df = pd.read_csv("https://media.geeksforgeeks.org/wp-content/uploads/nba.csv")
def details(df:DataFrame):
    """
    Prints Stats of a dataframe, Returning columns, the shape and datatypes of the columns
    """
    print(f"""
    Columns: {list(df.columns)}
    Shape: {df.shape[0]} Rows, {df.shape[1]} Columns
    Data Types: {list(df.dtypes)}
    """)



In [None]:
def index_setter(df:DataFrame, index) -> None:
    """
    Sets the index of dataframe to a desired existing column
        - df: dataframe to have index changed
        - index: name of column that index will be set to
    """
    df.set_index(index, inplace=True)

In [None]:


def remove_new_line_char(df:DataFrame) -> DataFrame:
    """
    Removes new lines from dataframe.
    """
    return df.replace('\n','', regex=True)

In [None]:
def nullfill(df:DataFrame, fill_val) -> None:
    """
    Targets empty fields of a dataframe and sets them to the fill_val
    """    
    df.fillna(value=fill_val, axis=1, inplace=True)

In [None]:
def duplicate_drop(df:DataFrame, col_list=None) -> None:
    """
    drop duplicate rows targeting a specific column(s)
    col_list: ['column1', 'column2']
    - Enter column name as a string
    - Only 1 column required, if none set uses all columns
    - Enter multiple columns as strings seperated by commas in a list
    - eg: duplicate drop(dataframe, ['col1', 'col2'])
    """
    df.drop_duplicates(subset=col_list, inplace=True)

In [None]:
def drop_missing_row(df:DataFrame, col_list=None) -> None:
    """
    Drops dataframe row if it is missing any data in col_list
    col_list: ['column1', 'column2']
    - Enter column name as a string or list
    - Only 1 column required, if none set uses all columns
    - Enter multiple columns as strings seperated by commas in a list
    - eg: drop_missing_row(dataframe, ['col1', 'col2'])
    """
    df.dropna(subset=col_list, inplace=True)

In [None]:
def drop_low_data(df:DataFrame, threshold=2) -> None:
    """
    Drops dataframe row if missing more than x(threshold) values, Default is 2 
    """
    df.dropna(thresh=threshold, inplace=True)

In [42]:
def drop_extra_columns(df:DataFrame, num_cols):
    """
    Drops dataframe columns if dataframe has more than <num_cols> columns
    """
    df.drop(df.columns[num_cols:])

In [45]:

def write_csv(df:DataFrame, filepath, filename):
    """
    Writes a dataframe into a new csv file located in filepath
    """
    os.makedirs(filepath, exist_ok=True)  
    df.to_csv(f'{filepath}{filename}')