In [None]:
! pip install elasticsearch --quiet

In [None]:
!pip install pandas

In [None]:
! pip install firebase-admin faker --quiet

In [None]:
from datetime import datetime
import pandas as pd

# Load Firebasedata

In [None]:
import firebase_admin
from firebase_admin import credentials, firestore
from faker import Faker

In [None]:
cred = credentials.Certificate('serviceAccount.json')                                                                                                
firebase_admin.initialize_app(cred)

In [None]:
#get list of collections
db = firestore.client()
collections = [x.id for x in db.collections()]
print(collections)

In [None]:
master_classes = db.collection("Masterclasses").get()
users = db.collection("Users").get()
coaches = db.collection("Coaches").get()
events = db.collection("Events").get()
blogs = db.collection("Blogs").get()
snax = db.collection("Snax").get()
organizations = db.collection("Orga").get()

#convert to json
master_classes = [x.to_dict() for x in master_classes]
users = [x.to_dict() for x in users]
coaches = [x.to_dict() for x in coaches]
events = [x.to_dict() for x in events]
blogs = [x.to_dict() for x in blogs]
snax = [x.to_dict() for x in snax]
organizations = [x.to_dict() for x in organizations]

# Filter out Firebase fields

### Users

In [None]:
#remove fields email, firstName, lifetime, refACID, role, surname from users
for user in users:
    user.pop('email', None)
    user.pop('firstName', None)
    user.pop('lifetime', None)
    user.pop('refACID', None)
    user.pop('role', None)
    user.pop('surname', None)

### Coaches

In [None]:
#remove about, blogs, books, facebook, image, instagram, link,linkedin, podcasts from coaches
for coach in coaches:
    coach.pop('about', None)
    coach.pop('blogs', None)
    coach.pop('books', None)
    coach.pop('facebook', None)
    coach.pop('image', None)
    coach.pop('instagram', None)
    coach.pop('link', None)
    coach.pop('linkedin', None)
    coach.pop('podcasts', None)

### Masterclasses

In [None]:
#remove fields, courseLessons, courseOverview, Description, filterTags, Progress, videoURL from master_classes
for master_class in master_classes:
    master_class.pop('courseLessons', None)
    master_class.pop('courseOverview', None)
    master_class.pop('Description', None)
    master_class.pop('filterTags', None)
    master_class.pop('Progress', None)
    master_class.pop('videoURL', None)

In [None]:
from enum import Enum

class Event_Type(Enum):
    ENROLL_COURSE = 0
    SAVE_COURSE = 1
    UNSAVE_COURSE = 2
    COMPLETE_COURSE = 3
    COMPLETE_LESSON = 4
    LOGIN = 5
    LOGOUT = 6
    BERATER_KONTAKT = 7
    SAVE_BLOG = 8
    UNSAVE_BLOG = 9
    SAVE_SNAC = 10
    UNSAVE_SNAC = 11
    

# Calculate additional metrics

# Write data to elastic

We will recalculate aggregate metrics

### Users

Total Watchtime

In [None]:
#Total Watchtime
#itertae through each users and calculate total watchtime by aggregating the Progress propertie of Watched
for user in users:
    watchtime = 0
    for watched in user['Watched']:
        watchtime += int(watched['progress'].split(" ")[0])
    user['TotalWatchtime'] = watchtime

Courses enrolled

In [None]:
# for each users go through events and filter Event-Type = enroll_course and calculate total enrollments
for user in users:
    enrollments = 0
    for event in events:
        if event['Event-Type'] == Event_Type.ENROLL_COURSE.name:
            if event['User-Id'] == user['id']:
                enrollments += 1
    user['Age'] = datetime.today().year - int(user['Birthdate'].split("/")[0])
    user['Courses_enrolled'] = enrollments

Courses completed

In [None]:
# for each users go through events and filter Event-Type = complete_course and calculate "Courses_completed"
for user in users:
    courses_completed = 0
    for event in events:
        if event['Event-Type'] == Event_Type.COMPLETE_COURSE.name:
            if event['User-Id'] == user['id']:
                courses_completed += 1
    user['Courses_completed'] = courses_completed

Courses saved

In [None]:
# for each users go through events and filter Event-Type = save_course and calculate "Courses_saved"
for user in users:
    courses_saved = 0
    for event in events:
        if event['Event-Type'] == Event_Type.SAVE_COURSE.name:
            if event['User-Id'] == user['id']:
                courses_saved += 1
    user['Courses_saved'] = courses_saved

<span style="color:#BAE1FF">Adding geopoint as mapping to index users</span>

In [82]:
mappings = {
  "settings": {
    "number_of_shards": 1
  },
  "mappings": {
    "properties": {
      "geo": {
                  "properties": {
                     "location": {
                           "type": "geo_point"
                     }
                  }
               }
    }
  }
}


client.indices.create(index='users_all', body=mappings)
SECRET="Majd ist der Beste"

<span style="color:#BAE1FF">Adding the locations from the csv to each user</span>

In [None]:
geotable = 'plz_geocoord.csv'

df = pd.read_csv(geotable)
df.set_index('plz', inplace=True)
df.head()

for user in users:
   if user['postalCode'] not in df.index:
       print(user['postalCode'])
       continue
       
   postalcode = int(str(user['postalCode']))
   
   user['geo'] =  {'location': str(df.loc[postalcode]['lat'])+","+str(df.loc[postalcode]['lng'])}



### Events

In [None]:
for event in events:
    date = datetime.strptime(event['Timestamp'], '%Y/%m/%d').date()
    event['Year'] = date.year
    event['Month'] = date.month
    event['Day'] = date.day

Average session duration (maybe remove)

It is important to note that here we can calculate additional field metrics

In [None]:
print(SECRET)

# Move data to elastic

In [None]:

from elasticsearch import Elasticsearch
# Found in the 'Manage this deployment' page
CLOUD_ID = "IP:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyRkZTFjZjM2NjdkN2E0OTg2YTE1NDgzYTVkZmU5YmJkMyQwYTdhYWUyN2JhMmI0NWQ2YTVmOTVjNmM2ZjUxZDMzOA=="
# Found in the 'Management' page under the section 'Security'
API_KEY = "WmlxVFBvd0JGclg2OUZEY2NIZkU6cG5hSXQ2bjVTczY5eWl5VThLYzFaZw=="
# Create the client instance
client = Elasticsearch(
    cloud_id=CLOUD_ID,
    api_key=API_KEY,
)
          

<span style="color:#BAE1FF">Add users</span>

In [83]:
for user in users:
    client.index(index="users_all", body=user, id=user["id"])


<span style="color:#BAE1FF">Add master classes</span>

In [84]:
for master_class in master_classes:
    client.index(index="masterclasses", body=master_class, id=master_class["id"])

<span style="color:#BAE1FF">Add blogs</span>

In [85]:
for blog in blogs:
    client.index(index="blogs", body=blog, id=blog["id"])

<span style="color:#BAE1FF">Add snax</span>

In [86]:
for snak in snax:
    client.index(index="snax", body=snak, id=snak["id"])

<span style="color:#BAE1FF">Add coaches</span>

In [87]:
for coach in coaches:
    client.index(index="coaches", body=coach, id = coach["id"])

<span style="color:#BAE1FF">Add events</span>

In [88]:
for event in events:
    client.index(index="events", body=event, id = event["Event-ID"])


<h1 style="color:red">Deleting indicies</h1>

In [None]:
#client.options(ignore_status=[400,404]).indices.delete(index='users_index')
#client.options(ignore_status=[400,404]).indices.delete(index='masterclasses')
#client.options(ignore_status=[400,404]).indices.delete(index='blogs')
#client.options(ignore_status=[400,404]).indices.delete(index='snax')
#client.options(ignore_status=[400,404]).indices.delete(index='coaches')
#client.options(ignore_status=[400,404]).indices.delete(index='events')

# Creating Orga Indicies 

In [None]:
def get_schulen_users():
    result = []
    for user in users:
        if user["Education"] == "Schule":
            result += [user]
    return result

def get_orga_users(orga_name):
    result = []
    for user in users:
        if user["advisorACID"] == orga_name:
            result += [user]
    return result


In [None]:
mappings = {
  "settings": {
    "number_of_shards": 1
  },
  "mappings": {
    "properties": {
      "geo": {
                  "properties": {
                     "location": {
                           "type": "geo_point"
                     }
                  }
               }
    }
  }
}



def create_index(users, index_name):
    client.indices.create(index=index_name, body=mappings)
    for user in users:
        client.index(index=index_name, body=user, id=user['id'])


# Für Schüler

In [None]:
schueler = get_schulen_users()
create_index(schueler, "users_schueler")

# Für Organisationen

In [None]:
threshold = 10
for orga in organizations:
    name = orga["name"]
    if "Schule" in name:
        continue
    people = get_orga_users(name)
    if len(people) < threshold:
        print(name)
        continue
    create_index(people, "users_" + "_".join(name.lower().split(" ")))
