In [1]:
! pip install elasticsearch --quiet


[notice] A new release of pip is available: 23.1.2 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
! pip install firebase-admin faker --quiet


[notice] A new release of pip is available: 23.1.2 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


# Load Firebasedata

In [2]:
import firebase_admin
from firebase_admin import credentials, firestore
from faker import Faker

In [3]:
cred = credentials.Certificate('serviceAccount.json')
firebase_admin.initialize_app(cred)

<firebase_admin.App at 0x1d118e47e20>

In [4]:
#get list of collections
db = firestore.client()
collections = [x.id for x in db.collections()]
print(collections)

['Blogs', 'Coaches', 'Events', 'Masterclasses', 'Snax', 'Users']


In [6]:
master_classes = db.collection("Masterclasses").get()
users = db.collection("Users").get()
coaches = db.collection("Coaches").get()
events = db.collection("Events").get()
blogs = db.collection("Blogs").get()
snax = db.collection("Snax").get()

#convert to json
master_classes = [x.to_dict() for x in master_classes]
users = [x.to_dict() for x in users]
coaches = [x.to_dict() for x in coaches]
events = [x.to_dict() for x in events]
blogs = [x.to_dict() for x in blogs]
snax = [x.to_dict() for x in snax]

# Filter out Firebase fields

### Users

In [7]:
len(users)

100

In [8]:
#remove fields email, firstName, lifetime, refACID, role, surname from users
for user in users:
    user.pop('email', None)
    user.pop('firstName', None)
    user.pop('lifetime', None)
    user.pop('refACID', None)
    user.pop('role', None)
    user.pop('surname', None)

### Coaches

In [9]:
#remove about, blogs, books, facebook, image, instagram, link,linkedin, podcasts from coaches
for coach in coaches:
    coach.pop('about', None)
    coach.pop('blogs', None)
    coach.pop('books', None)
    coach.pop('facebook', None)
    coach.pop('image', None)
    coach.pop('instagram', None)
    coach.pop('link', None)
    coach.pop('linkedin', None)
    coach.pop('podcasts', None)

### Masterclasses

In [10]:
#remove fields, courseLessons, courseOverview, Description, filterTags, Progress, videoURL from master_classes
for master_class in master_classes:
    master_class.pop('courseLessons', None)
    master_class.pop('courseOverview', None)
    master_class.pop('Description', None)
    master_class.pop('filterTags', None)
    master_class.pop('Progress', None)
    master_class.pop('videoURL', None)

# Calculate additional metrics

# Write data to elastic

We will recalculate aggregate metrics

### Users

Total Watchtime

In [11]:
users[1]["Watched"]

[{'Masterclass': 30, 'progress': '260 min'},
 {'Masterclass': 61, 'progress': '155 min'},
 {'Masterclass': 5, 'progress': '386 min'},
 {'Masterclass': 72, 'progress': '149 min'},
 {'Masterclass': 47, 'progress': '416 min'}]

In [12]:
#Total Watchtime
#itertae through each users and calculate total watchtime by aggregating the Progress propertie of Watched
for user in users:
    watchtime = 0
    for watched in user['Watched']:
        watchtime += int(watched['progress'].split(" ")[0])
    user['TotalWatchtime'] = watchtime

Courses enrolled

In [13]:
# for each users go through events and filter Event-Type = enroll_course and calculate total enrollments
for user in users:
    enrollments = 0
    for event in events:
        if event['Event-Type'] == 'enroll_course':
            if event['User-Id'] == user['id']:
                enrollments += 1
    user['Courses_enrolled'] = enrollments

Courses completed

In [14]:
# for each users go through events and filter Event-Type = complete_course and calculate "Courses_completed"
for user in users:
    courses_completed = 0
    for event in events:
        if event['Event-Type'] == 'complete_course':
            if event['User-Id'] == user['id']:
                courses_completed += 1
    user['Courses_completed'] = courses_completed

Courses saved

In [15]:
# for each users go through events and filter Event-Type = save_course and calculate "Courses_saved"
for user in users:
    courses_saved = 0
    for event in events:
        if event['Event-Type'] == 'save_course':
            if event['User-Id'] == user['id']:
                courses_saved += 1
    user['Courses_saved'] = courses_saved

Average session duration (maybe remove)

It is important to note that here we can calculate additional field metrics

# Move data to elastic

In [16]:

from elasticsearch import Elasticsearch
# Found in the 'Manage this deployment' page
CLOUD_ID = "IP:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyRkZTFjZjM2NjdkN2E0OTg2YTE1NDgzYTVkZmU5YmJkMyQwYTdhYWUyN2JhMmI0NWQ2YTVmOTVjNmM2ZjUxZDMzOA=="
# Found in the 'Management' page under the section 'Security'
API_KEY = "WmlxVFBvd0JGclg2OUZEY2NIZkU6cG5hSXQ2bjVTczY5eWl5VThLYzFaZw=="
# Create the client instance
client = Elasticsearch(
    cloud_id=CLOUD_ID,
    api_key=API_KEY,
)
          

In [17]:
#write ['Blogs', 'Coaches', 'Events', 'Masterclasses', 'Snax', 'Users'] to elastic search
for user in users:
    client.index(index="users", body=user)
for coach in coaches:
    client.index(index="coaches", body=coach)
for event in events:
    client.index(index="events", body=event)
for master_class in master_classes:
    client.index(index="masterclasses", body=master_class)
for blog in blogs:
    client.index(index="blogs", body=blog)
for snak in snax:
    client.index(index="snax", body=snak)
