# Firebase Testing with Python

This will be the code that we host on PythonAnywhere hopefully to communicate with the Firebase servers about various things, namely fetching CSV's to be used for analysis.

Please run the imports only once, there are issues with reinitializing the firebase app.

In [15]:
import firebase_admin
from firebase_admin import credentials, firestore
from firebase_admin import storage

if not firebase_admin._apps:
    cred = credentials.Certificate('mlforall-admin-sdk.json')
    firebase_admin.initialize_app(cred, {
        'storageBucket': 'mlforall-14bf7.appspot.com'
    })
bucket = storage.bucket()
db = firestore.client

# 'bucket' is an object defined in the google-cloud-storage Python library.
# See https://googlecloudplatform.github.io/google-cloud-python/latest/storage/buckets.html
# for more details.

## Reading CSV From Storage

Some stats stuff that we'll need

In [2]:
import pandas as pd
import numpy as np
import pickle
from io import StringIO

Modified from GCS Documentation, How to Download Files https://cloud.google.com/storage/docs/downloading-objects#code-samples

In [3]:
def get_blob(bucket, source_blob_name):
    
    blob = bucket.blob(source_blob_name)
    csv_bytes = blob.download_as_string()
    s = str(csv_bytes,'utf-8')
    data = StringIO(s) 

    return pd.read_csv(data)

My Tests

In [4]:
def make_path(uid, project_title, file_name):
    return uid + "/" + project_title + "/" + file_name

In [5]:
uid = "UDjMojFqWHOdW0fCIJPMNPScQ9p1"
# project_title = "Spotify"
# file_name = "simple_top50.csv"
project_title = "Pokemon"
file_name = "Pokemon.csv"

In [6]:
df = get_blob(bucket, make_path(uid, project_title, file_name))
df.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


## Uploading Pickles to Storage

First let's make some models

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [8]:
def build_logistic_regression(X, y):
    logreg = LogisticRegression()
    logreg.fit(X, y)
    return logreg

def build_and_pickle(df, target_parameter, df_variables, pickle_name, debug=False):
    target = df[target_parameter]
    col_name_list = list(df.columns)
    
    for col in df_variables:
        if col in col_name_list:
            col_name_list.remove(col)
            
    df.drop(col_name_list, axis=1, inplace=True)
    # now target contains the labels, and df contains the variables
    X = df
    y = target
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    # models
    logreg = build_logistic_regression(X_train, y_train)
    
    return pickle.dumps(logreg)

def update_firestore()

Some testing info

In [9]:
# target = 'Genre'
# variables = ['Beats.Per.Minute', 'Energy', 'Danceability','Loudness..dB..', 'Liveness', 'Valence.', 'Length.']

target = 'Type 1'
variables = ['HP', 'Attack', 'Defense','Speed']

pickle_name = "log_reg"
path = make_path(uid, project_title, pickle_name)
pickle_bytes = build_and_pickle(df, target, variables, project_title, debug=True)

## Pickle Functions

In [10]:
#from joblib import dump, load

def p_send_blob(bucket, pickle_bytes, pickle_path):
    
    blob = bucket.blob(pickle_path)
    blob.upload_from_string(pickle_bytes)

def p_get_blob(bucket, pickle_path):
        
    blob = bucket.blob(pickle_path)
    pickle_bytes = blob.download_as_string()

    return pickle.loads(pickle_bytes)

Send our data to the storage

In [11]:
p_send_blob(bucket, pickle_bytes, path)

Get the data back

In [12]:
def floatCast(num):
    return float(num)

def load_and_predict(model, prediction_variables):
    X_predict = [list(map(floatCast, prediction_variables))]
    guess = model.predict(X_predict)
    return guess[0]

In [13]:
model = p_get_blob(bucket,path)
# X_predict = [120,64,70,-5,14,54,200] # spotify
X_predict = [40, 10, 20, 80] # pokemon

load_and_predict(model, X_predict)

'Psychic'

# Kaggle Downloads

In [49]:
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()



Code taken / modified from https://github.com/Kaggle/kaggle-api/blob/master/kaggle/api/kaggle_api_extended.py and https://github.com/Kaggle/kaggle-api/blob/master/kaggle/api/kaggle_api.py and https://github.com/Kaggle/kaggle-api/blob/3ce5046d42d55b951ad301053e002930cead1cbc/kaggle/api/kaggle_api.py#L1506

In [2]:
def getSlugs(dataset):
    # dataset: string informat [owner]/[dataset-name]
    if '/' in dataset:
        dataset_urls = dataset.split('/')
        owner_slug = dataset_urls[0]
        dataset_slug = dataset_urls[1]
        return owner_slug, dataset_slug
    else:
        return None

In [3]:
owner, dataset = getSlugs("avenn98/world-of-warcraft-demographics")
# thread = api.datasets_download(owner, dataset)
#result = thread.get()
#downloads locally
#thread = api.dataset_download_files('avenn98/world-of-warcraft-demographics')
#response = thread.get()

# Dataworld Downloads

In [100]:
import datadotworld as dw

# helpful to use dir() !

def loadDf(path):
    dfs = dw.load_dataset(path).dataframes._dict.values()
    return list(dfs)[0]._loader_func()

def getPath(url):
    return url.replace("https://data.world/","")

def getDf(url):
    return loadDf(getPath(url))

### Example paths:
(From https://data.world/datasets/open-data)
- jonloyens/an-intro-to-dataworld-dataset
- https://data.world/dcopendata/swimming-pools
- https://data.world/makeovermonday/2019w51

In [101]:
df = getDf("https://data.world/makeovermonday/2019w51")
df.head()

Unnamed: 0,season,rank,team,g,w,l,def_rtg,rtg_vs_league_avg
0,1996-97,1.0,Miami Heat,82.0,61.0,21.0,99.2,0.944948
1,1996-97,2.0,New York Knicks,82.0,57.0,25.0,99.5,0.947806
2,1996-97,3.0,Atlanta Hawks,82.0,56.0,26.0,100.3,0.955426
3,1996-97,4.0,Chicago Bulls,82.0,69.0,13.0,100.7,0.959237
4,1996-97,5.0,Cleveland Cavaliers,82.0,42.0,40.0,100.8,0.960189
