## Matplotlib

Cheat Sheet: https://datacamp-community-prod.s3.amazonaws.com/28b8210c-60cc-4f13-b0b4-5b4f2ad4790b

Documentation: https://matplotlib.org/

### Stem and Leaf Plot - Matplotlib

    import matplotlib.pyplot as plt
    stems = []
    leafs = []

    for mark in marks:
        stem = mark //10
        leaf = mark %10
        stems.append(stem)
        leafs.append(leaf)
        
    # Create a stem and leaf plot including the above styling
    plt.figure(figsize=(12,8))
    #markerline, stemlines, baseline = 

    plt.stem(stems, leafs, '-.', 'o' )
    plt.title('Stem and Leaf Plot for Student Marks', fontsize = 30 )
    plt.ylabel('Leafs', fontsize = 20)
    plt.xlabel('Stems', fontsize = 20)

    plt.show()
    
### Keras Training/Validation Loss and Accuracy Plots - Matplotlib

    # loss visualization
    import matplotlib.pyplot as plt
    loss_values = model_val_dict['loss']
    val_loss_values = model_val_dict['val_loss']

    epochs = range(1, len(loss_values) + 1)
    plt.plot(epochs, loss_values, 'g', label='Training loss')
    plt.plot(epochs, val_loss_values, 'blue', label='Validation loss')

    plt.title('Training & validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()
    
    #accuracy visualization
    acc_values = model_val_dict['acc'] 
    val_acc_values = model_val_dict['val_acc']

    plt.plot(epochs, acc_values, 'r', label='Training acc')
    plt.plot(epochs, val_acc_values, 'blue', label='Validation acc')
    plt.title('Training & validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()
    
### Matplotlib Images

#### Intermedaite Activations in CNN

**A model must already be saved in order for this visualization to be used**

    #load saved model
    from keras.models import load_model
    model = load_model('chest_xray_all_with_augmentation_data.h5')
    model.summary()
    
    #load an image
    from keras.preprocessing import image
    import matplotlib.image as mpimg
    import matplotlib.pyplot as plt
    %matplotlib inline

    filename = 'person3_virus_16.jpeg'
    img = image.load_img(filename, target_size=(150, 150))
    plt.imshow(img)
    plt.show()
    
    #transform to tensor and visualize
    import numpy as np

    img_tensor = image.img_to_array(img)
    img_tensor = np.expand_dims(img_tensor, axis=0)

    #Follow the Original Model Preprocessing
    img_tensor /= 255.

    #Check tensor shape
    print(img_tensor.shape)

    #Preview an image
    plt.imshow(img_tensor[0])
    plt.show()
    
    #plot features map
    from keras import models
    import math #used for determining the number of rows in our figure below

    # Extract model layer outputs
    layer_outputs = [layer.output for layer in model.layers[:8]]

    # Create a model for displaying the feature maps
    activation_model = models.Model(inputs=model.input, outputs=layer_outputs)

    activations = activation_model.predict(img_tensor)

    #Extract Layer Names for Labelling
    layer_names = []
    for layer in model.layers[:8]:
        layer_names.append(layer.name)

    total_features = sum([a.shape[-1] for a in activations])
    total_features

    n_cols = 16
    n_rows = math.ceil(total_features / n_columns)


    iteration = 0
    fig , axes = plt.subplots(nrows=n_rows, ncols=n_columns, figsize=(n_cols, n_rows*1.5))

    for layer_n, layer_activation in enumerate(activations):
        n_channels = layer_activation.shape[-1]
        for ch_idx in range(n_channels):
            row = iteration // n_columns
            column = iteration % n_columns

            ax = axes[row, column]

            channel_image = layer_activation[0,
                                             :, :,
                                             ch_idx]
            # Post-process the feature to make it visually palatable
            channel_image -= channel_image.mean()
            channel_image /= channel_image.std()
            channel_image *= 64
            channel_image += 128
            channel_image = np.clip(channel_image, 0, 255).astype('uint8')

            ax.imshow(channel_image, aspect='auto', cmap='viridis')
            ax.get_xaxis().set_ticks([])
            ax.get_yaxis().set_ticks([])

            if ch_idx == 0:
                ax.set_title(layer_names[layer_n], fontsize=10)
            iteration += 1

    fig.subplots_adjust(hspace=1.25)
    plt.savefig("Intermediate_Activations_Visualized.pdf")
    plt.show()
    


### Confusion Matrix Function

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    #Add Normalization Option
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

## Seaborn

Cheat Sheet:  https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Python_Seaborn_Cheat_Sheet.pdf

Documentation: https://seaborn.pydata.org/

**not on cheatsheet**
- two types of objects (FacetGrid, AxesSubplot)
- relplot and catplot are FacetGrids
- check which object by setting the visual to a variable (g=sns.catplot())
- check type with type(g)
- add Title with g.fig.suptitle('Title') for FacetGrid
- add Title with g.set_title for AxesSubplot

### Relplot
> Creates a relational plot that can contain several variables with unique customization

    sns. relplot(x=x , y=y , data=data, kind=['scatter', 'line'], hue='separates colors into this variable', style='sets different point styles to this variable', aplha=.1-1.0, size='sets size of points based on count, setting to 'size' will vary the size by count', col='creates column style subplots based on varaibles', row='creates row style subplots based on variable, col_order='set col order [list], row_order=, set row order [list]
    
### Catplot

> Same usecase as relplot, only for categorical data. Differences are:
- kind='count','bar','box', 'point'
- sym='changes style or use of outliers on boxplots'
- ci=None (removes ci marks from barchart)
- whis='can adjust range of whiskers in boxplot'
- order='similar to col_order and row_order'
- join= 'if set to False, will remove joining line in point plot'
- estimator= 'can change the estimation from mean to a numpy estimator (median, std, etc.)'
- capsize= 'places an upper and lower cap on the confidence interval lines with a set length'

### Highlighting Specific Reference Points (scatterplot)

    houston_pollution = pollution[pollution.city  ==  'Houston'].copy()

    # Find the highest observed O3 value
    max_O3 = houston_pollution.O3.max()

    # Make a column that denotes which day had highest O3
    houston_pollution['point type'] = ['Highest O3 Day' if O3  ==  max_O3 else 'Others' for O3 in houston_pollution.O3]

    # Encode the hue of the points with the O3 generated column
    sns.scatterplot(x = 'NO2',
                    y = 'SO2',
                    hue = 'point type',
                    data = houston_pollution)
    plt.show()
    
### KDE comparing one vs rest groups

    sns.distplot(pollution[pollution.city == 'Vandenberg Air Force Base'].O3, 
                 label = 'Vandenberg', 
                 # Turn of the histogram and color blue to stand out
                 hist = False,
                 color = 'steelblue', 
                 # Turn on rugplot
                 rug = True)

    sns.distplot(pollution[pollution.city != 'Vandenberg Air Force Base'].O3, 
                 label = 'Other cities',
                 # Turn off histogram and color gray
                 hist = False,  
                 color = 'gray')
    plt.show()
    
### Beeswarm Plot for comparing multiple classes in one time period

    # Filter data to just March
    pollution_mar = pollution[pollution.month == 3]

    # Plot beeswarm with x as O3
    sns.swarmplot(y = "city",
                  x = 'O3', 
                  data = pollution_mar, 
                  # Decrease the size of the points to avoid crowding 
                  size = 3)

    # Give a descriptive title
    plt.title('March Ozone levels by city')
    plt.show()
    
### Annotate Plots with arrow and custom text placement

    # Query and filter to New Years in Long Beach
    jan_pollution = pollution.query("(month  ==  1) & (year  ==  2012)")
    lb_newyears = jan_pollution.query("(day  ==  1) & (city  ==  'Long Beach')")

    sns.scatterplot(x = 'CO', y = 'NO2',
                    data = jan_pollution)

    # Point arrow to lb_newyears & place text in lower left 
    plt.annotate('Long Beach New Years',
                 xy = (lb_newyears.CO, lb_newyears.NO2),
                 xytext = (2, 15), 
                 # Shrink the arrow to avoid occlusion
                 arrowprops = {'facecolor':'gray', 'width': 3, 'shrink': 0.03},
                 backgroundcolor = 'white')
    plt.show()
    
### Bootstrap Histogram Confidence

    cinci_may_NO2 = pollution.query("city  ==  'Cincinnati' & month  ==  5").NO2

    # Generate bootstrap samples
    boot_means = bootstrap(cinci_may_NO2, 1000)

    # Get lower and upper 95% interval bounds
    lower, upper = np.percentile(boot_means, [2.5, 97.5])

    # Plot shaded area for interval
    plt.axvspan(lower, upper, color = 'gray', alpha = 0.2)

    # Draw histogram of bootstrap samples
    sns.distplot(boot_means, bins = 100, kde = False)

    plt.show()
    
### Swarmplot Bootstrap

    # Initialize a holder DataFrame for bootstrap results
    city_boots = pd.DataFrame()

    for city in ['Cincinnati', 'Des Moines', 'Indianapolis', 'Houston']:
        # Filter to city
        city_NO2 = pollution_may[pollution_may.city  ==  city].NO2
        # Bootstrap city data & put in DataFrame
        cur_boot = pd.DataFrame({'NO2_avg': bootstrap(city_NO2, 100), 'city': city})
        # Append to other city's bootstraps
        city_boots = pd.concat([city_boots,cur_boot])

    # Beeswarm plot of averages with citys on y axis
    sns.swarmplot(y = "city", x = "NO2_avg", data = city_boots, color = 'coral')

    plt.show()

## Bokeh

## Folium Interactive Maps

https://python-visualization.github.io/folium/
    
    import folium

    lat = 51.51
    long = -0.14

    #Create a map of the area
    base_map = folium.Map([lat, long], zoom_start=13)
    base_map
    
    import numpy as np

    #Generate some random locations to add to our map
    x = [lat + np.random.uniform(-.1,.1) for i in range(20)]
    y = [long + np.random.uniform(-.1,.1) for i in range(20)]
    points = list(zip(x, y))
    for p in points:
        lat = p[0]
        long = p[1]
        marker = folium.Marker(location=[lat, long])
        marker.add_to(base_map)
    base_map
    
    #adding popup boxes to location points
    for p in points:
        lat = p[0]
        long = p[1]
        popup_text = "Lattitude: {}, Longitude: {}".format(lat,long)
        popup = folium.Popup(popup_text, parse_html=True)
        marker = folium.Marker(location=[lat, long], popup=popup)
        marker.add_to(base_map)
    base_map
    
### Converting geodataframe data to folium points for plotting    
    
    # Print the head of the urban_polygon
    print(urban_polygon.head())

    # Create urban_center from the urban_polygon center
    urban_center = urban_polygon.center[0]

    # Print urban_center
    print(urban_center)

    # Create array for folium called urban_location (switching from lng,lat to lat, lng)
    urban_location = [urban_center.y, urban_center.x]

    # Print urban_location
    print(urban_location)
    
    # Create array for called folium_loc from the urban_polygon center point
    point = urban_polygon.center[0]
    folium_loc = [point.y, point.x]

    # Construct a map from folium_loc: downtown_map
    downtown_map = folium.Map(location = folium_loc, zoom_start = 15)

    # Draw our neighborhood: Urban Residents
    folium.GeoJson(urban_polygon.geometry).add_to(downtown_map)

    # Display the map
    display(downtown_map)

## Choropleth

### Folium 

**arguments**
- geo_data - the source data for the polygons (geojson file or a GeoDataFrame)
- name - the name of the geometry column (or geojson property) for the polygons
- data- the source DataFrame or Series for the normalized data
- columns- a list of columns: one that corresponds to the polygons and one that has the value to plot
- key_on - a GeoJSON variable to bind the data to (always starts with feature)
- fill_color - polygon fill color (defaults to blue)
- fill_opacity - range between 0 (transparent) and 1 (completely opaque)
- line_color - color of polygon border lines (defaults to black)
- line_opacity - range between 0 (transparent) and 1 (completely opaque)
- legend_name - creates a title for the legend

#### Folium Choropleth with Markers and Popups

    # Center point for Nashville
    nashville = [36.1636,-86.7823]

    # Create map
    m = folium.Map(location=nashville, zoom_start=10)
    
    # Build choropleth
    m.choropleth(
        geo_data=districts_and_permits,
        name='geometry',
        data=districts_and_permits,
        columns=['district', 'permit_density'],
        key_on='feature.properties.district',
        fill_color='Reds',
        fill_opacity=0.5,
        line_opacity=1.0,
        legend_name='2017 Permitted Building Projects per km squared')
        
        # Create LayerControl and add it to the map            
    folium.LayerControl().add_to(m)

    # Display the map
    display(m)
    
    # Create center column for the centroid of each district
    districts_and_permits['center'] = districts_and_permits.geometry.centroid

    # Build markers and popups
    for row in districts_and_permits.iterrows():
        row_values = row[1] 
        center_point = row_values['center']
        location = [center_point.y, center_point.x]
        popup = ('Council District: ' + str(row_values['district']) + 
                 ';  ' + 'permits issued: ' + str(row_values['bldg_permits']))
        marker = folium.Marker(location = location, popup = popup)
        marker.add_to(m)



     #Display the map
    display(m)