In [1]:
! pip install elasticsearch --quiet


[notice] A new release of pip is available: 23.1.2 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
! pip install firebase-admin faker --quiet


[notice] A new release of pip is available: 23.1.2 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


# Load Firebasedata

In [4]:
import firebase_admin
from firebase_admin import credentials, firestore
from faker import Faker

In [5]:
cred = credentials.Certificate('serviceAccount.json')
firebase_admin.initialize_app(cred)

<firebase_admin.App at 0x25fe7b9a800>

In [53]:
#get list of collections
db = firestore.client()
collections = [x.id for x in db.collections()]
print(collections)

['Coaches', 'Events', 'Masterclasses', 'Users']


In [54]:
master_classes = db.collection("Masterclasses").get()
users = db.collection("Users").get()
coaches = db.collection("Coaches").get()
events = db.collection("Events").get()

#convert to json
master_classes = [x.to_dict() for x in master_classes]
users = [x.to_dict() for x in users]
coaches = [x.to_dict() for x in coaches]
events = [x.to_dict() for x in events]

# Filter out Firebase fields

### Users

In [55]:
len(users)

100

In [56]:
#remove fields email, firstName, lifetime, refACID, role, surname from users
for user in users:
    user.pop('email', None)
    user.pop('firstName', None)
    user.pop('lifetime', None)
    user.pop('refACID', None)
    user.pop('role', None)
    user.pop('surname', None)

### Coaches

In [57]:
#remove about, blogs, books, facebook, image, instagram, link,linkedin, podcasts from coaches
for coach in coaches:
    coach.pop('about', None)
    coach.pop('blogs', None)
    coach.pop('books', None)
    coach.pop('facebook', None)
    coach.pop('image', None)
    coach.pop('instagram', None)
    coach.pop('link', None)
    coach.pop('linkedin', None)
    coach.pop('podcasts', None)

### Masterclasses

In [59]:
#remove fields, courseLessons, courseOverview, Description, filterTags, Progress, videoURL from master_classes
for master_class in master_classes:
    master_class.pop('courseLessons', None)
    master_class.pop('courseOverview', None)
    master_class.pop('Description', None)
    master_class.pop('filterTags', None)
    master_class.pop('Progress', None)
    master_class.pop('videoURL', None)

# Calculate additional metrics

# Write data to elastic

We will recalculate aggregate metrics

### Users

Total Watchtime

In [60]:
users[1]["Watched"]

[{'Masterclass': 30, 'progress': '260 min'},
 {'Masterclass': 61, 'progress': '155 min'},
 {'Masterclass': 5, 'progress': '386 min'},
 {'Masterclass': 72, 'progress': '149 min'},
 {'Masterclass': 47, 'progress': '416 min'}]

In [66]:
#Total Watchtime
#itertae through each users and calculate total watchtime by aggregating the Progress propertie of Watched
for user in users:
    watchtime = 0
    for watched in user['Watched']:
        watchtime += int(watched['progress'].split(" ")[0])
    user['TotalWatchtime'] = watchtime

Courses enrolled

In [67]:
# for each users go through events and filter Event-Type = enroll_course and calculate total enrollments
for user in users:
    enrollments = 0
    for event in events:
        if event['Event-Type'] == 'enroll_course':
            if event['User-Id'] == user['id']:
                enrollments += 1
    user['Courses_enrolled'] = enrollments

Courses completed

In [68]:
# for each users go through events and filter Event-Type = complete_course and calculate "Courses_completed"
for user in users:
    courses_completed = 0
    for event in events:
        if event['Event-Type'] == 'complete_course':
            if event['User-Id'] == user['id']:
                courses_completed += 1
    user['Courses_completed'] = courses_completed

Courses saved

In [69]:
# for each users go through events and filter Event-Type = save_course and calculate "Courses_saved"
for user in users:
    courses_saved = 0
    for event in events:
        if event['Event-Type'] == 'save_course':
            if event['User-Id'] == user['id']:
                courses_saved += 1
    user['Courses_saved'] = courses_saved

Average session duration (maybe remove)

It is important to note that here we can calculate additional field metrics

# Move data to elastic

In [71]:

from elasticsearch import Elasticsearch
# Found in the 'Manage this deployment' page
CLOUD_ID = "27730103de834f3faaf769245266b4e9:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyRhYzdlZjMxZGZlZjM0MTYzOThjNzRhM2FhNjk1ZGI3MCQ2YjAxNzFhYjc1MmY0MGQ3OGQ2NDIzZjI5MmZmNjdkNg=="
# Found in the 'Management' page under the section 'Security'
API_KEY = "aWZKVjhvc0JkMUlzbWJUUWpjcTY6YkU0UWlPbmRSZmVwY0NKUWpMSnV3Zw=="
# Create the client instance
client = Elasticsearch(
    cloud_id=CLOUD_ID,
    api_key=API_KEY,
)
          

In [73]:
#write users to elastic
for user in users:
    client.index(index='users', body=user)