In [34]:
import pandas as pd
import numpy as np
import datetime
import json
import os

import altair as alt

df = pd.read_csv('AB_NYC_2019.csv')

def null_count(df):
    #Calculating null values
    # We are dropping  Columns which have more than 30% of null value
    # Replacing null value with mean in case of int and float
    # If null value persist for other cases we are dropping those rows

    nulls_count = {col: df[col].isnull().sum() for col in df.columns}

    is_null_count_out_of_range = {col: df[col].isnull().sum() / df.shape[0] * 100 > 30 for col in df.columns}

    for k, v in is_null_count_out_of_range.items():
        if v:
             df.drop(k, axis=1, inplace=True)
        else:
            if isinstance(df[k][0], (np.int64, np.float64)):
                df[k].fillna(df[k].mean(), inplace=True)
            else:
                drop_list = df[df[k].isnull()].index.tolist()
                df.drop(drop_list, axis=0, inplace=True)

    nulls_count = {col: df[col].isnull().sum() for col in df.columns}

    return df

def unique_value_list(df1):
    #Finding all the unique values to in each column

    uniques = {col: df1[col].unique().tolist() for col in df1.columns}
    return uniques

def new_folder(fname):
    # Directory
    directory = "Altair_Plots/" + fname

    # Parent Directory path
    parent_dir = "../"
    print(parent_dir)
    # Path
    path = os.path.join(parent_dir, directory)
    print(path)

    try:
       os.mkdir(path)
    except OSError as error:
       pass
    return  path

def write_unique_value_list(path, uniques):
    # Writing the above created dictionary to a text file

    with open(path + '/Unique_values.txt', 'w') as json_file:
        json.dump(uniques, json_file)

def dtypes_conversion(uniques):
    #Converting columns to categorical having less than or equal to 10
    #unique values in a column

    for k,v in uniques.items():
         if len(pd.Index(v)) <=10:
            df[k]=df[k].astype('category')

    cat_col=df.select_dtypes(include=['category']).columns.tolist()
    num_col=df.select_dtypes(include=['int64','float64']).columns.tolist()

    types = df.dtypes[df.dtypes == 'object']
    for i, j in types.items():
        try:
            df[i] = pd.to_datetime(df[i])
            df[i + '_year'] = pd.DatetimeIndex(df[i]).year
            df[i + '_month'] = pd.DatetimeIndex(df[i]).month
            cat_col.append(i + '_year')
            cat_col.append(i + '_month')
            df[i + '_month'] = df[i + '_month'].apply( lambda x: datetime.date(1900, x, 1).strftime('%B') )
            print(df[i + '_month'])
        except:
            pass
    date_col = df.select_dtypes(include=['datetime64']).columns.tolist()
    
    return cat_col,num_col,date_col




    

    
def generate_plot_path(cat_name: str, num_name: str, path: str ) -> str:
    """Generate Plot path according to catgorical and numercial column.

    Arguments:
    ----
    cat_name: catgorical column name

    num_name: numercial column name

    chart_name: Specific name of chart.
    """
    file_name = "{} Vs {}_plot.json".format( cat_name, num_name  )
    plot_path = os.path.join(path, file_name)

    if os.path.exists(plot_path):
        print( "{fname} file already existed".format( fname = plot_path ) )
        return ""
    else:
        print( "Generate {fname} folder Successfully".format( fname = file_name )  )
        return plot_path    

def bar_JSON_generator(path,cat_col,num_col):
    # Generating JSON using altair methods

    for i in range(len(cat_col)):
         for j in range(len(num_col)):
            file_path = generate_plot_path( str(cat_col[i]), str(num_col[j]),  path)
            if file_path == "":
                pass
            else:
                chart=alt.Chart(df).mark_bar().encode(
                    x=cat_col[i],
                    y=num_col[j])
                chart.save(file_path)
           #print(cat_col[i],num_col[j])
    print("{} Graph JSON generated in ""altair_plots"" folder for the combinations".format("Simple Bar"))
    

def stackedBar_JSON_generator(path,cat_col):
    # Generating JSON using altair methods

    for i in range(len(cat_col)-1):
        file_path = generate_plot_path( str(cat_col[i]), str(cat_col[i+1]),  path)
        if file_path == "" :
            pass
        else:
            chart=alt.Chart(df).mark_bar(
                cornerRadiusTopLeft=3,
                cornerRadiusTopRight=3
                ).encode(x=cat_col[i],
                         y='count():Q',
                         color=cat_col[i+1])
            chart.save(file_path)
    print("{} Graph JSON generated in ""altair_plots"" folder for the combinations".format("Stacked Bar"))

def groupedBar_JSON_generator(path,cat_col,num_col):
    # Generating JSON using altair methods

    for i in range(len(cat_col)-1):
        for j in range(len(num_col)):
            chart=alt.Chart(df).mark_bar(
                ).encode(x='cat_col[i]:O', y='sum(num_col[i]):Q',
                  color='cat_col[i]:N',
                  column='cat_col[i-1]:N')
            chart.save(path+'/GroupedBar_'+str(cat_col[i])+" Vs "+str(cat_col[i+1])+"_"+str(num_col[i])+"plot.json")
    print("Stacked Bar Graph JSON generated in ""Altair_Plots"" folder for the combinations")

if __name__ == "__main__":
    df1 = null_count(df)

    uniques = unique_value_list(df1)

    
    ### simple bar ###
    path = new_folder("Bar_Graph")

    write_unique_value_list(path, uniques)

    cat_col, num_col, date_col = dtypes_conversion(uniques)

    bar_JSON_generator(path, cat_col, num_col)

    ### stack bar ###
    path = new_folder("stacked_Graph")

    write_unique_value_list(path, uniques)

    cat_col, num_col, date_col = dtypes_conversion(uniques)
    
    stackedBar_JSON_generator(path, cat_col)

    # groupedBar_JSON_generator(path, cat_col, num_col)

../
../Altair_Plots/Bar_Graph
0         October
1             May
3            July
4        November
5            June
           ...   
48782        July
48790        July
48799        July
48805        July
48852        July
Name: last_review_month, Length: 38821, dtype: object
Generate neighbourhood_group Vs id_plot.json folder Successfully
Generate neighbourhood_group Vs host_id_plot.json folder Successfully
Generate neighbourhood_group Vs latitude_plot.json folder Successfully
Generate neighbourhood_group Vs longitude_plot.json folder Successfully
Generate neighbourhood_group Vs price_plot.json folder Successfully
Generate neighbourhood_group Vs minimum_nights_plot.json folder Successfully
Generate neighbourhood_group Vs number_of_reviews_plot.json folder Successfully
Generate neighbourhood_group Vs reviews_per_month_plot.json folder Successfully
Generate neighbourhood_group Vs calculated_host_listings_count_plot.json folder Successfully
Generate neighbourhood_group Vs availabili

In [31]:
import datetime
def month_string_to_number( df , df_name):
    
    df[df_name] = df[df_name].apply( lambda x: datetime.date(1900, x, 1).strftime('%B') )
    return df

0         October
1             May
3            July
4        November
5            June
           ...   
48782        July
48790        July
48799        July
48805        July
48852        July
Name: last_review_month, Length: 38821, dtype: object