In [None]:
# Imports
import pandas as pd

# Read JSONs from file
df = pd.read_json('path/to/your/file.json', lines=True)

In [None]:
# Create mapping for values
mapping = {'old_value1': 'new_value1', 'old_value2': 'new_value2'}
df['column_name'] = df['column_name'].map(mapping)

In [None]:
# Rename simple attributes
df = df.rename(columns={
    'public_identifier': 'linkedInPubId',
    'profile_pic_url': 'profilePictureUrl',
    'background_cover_image_url': 'backgroundPictureUrl',
    'first_name': 'firstName',
    'last_name': 'lastName',
    'profile_headline': 'profileHeadline',
    'summary': 'profileText',
    'location': 'idLocation',
    'languages': 'idLanguages',
    'connections': 'connections',
    'github': 'github',
})

def map_simple_data(json_object):
    profilePictureUrl = 
    backgroundPictureUrl = 
    firstName = json_object.get('firstName')
    lastName = json_object.get('lastName')
    occupation = json_object.get('occupation')
    profileHeadline = json_object.get('profileHeadline')
    profileText = json_object.get('profileText')
    idLocation = json_object.get('idLocation')
    idLanguages = json_object.get('idLanguages')
    connections = json_object.get('connections')
    github = json_object.get('github')
    facebook = json_object.get('facebook')
    gender = json_object.get('gender')
    birthDate = json_object.get('birthDate')
    industry = json_object.get('industry')

    # Return dictionary of mapped values
    return {
        'linkedInPubId': json_object.get('public_identifier'),
        'profilePictureUrl': json_object.get('profile_pic_url'),
        'backgroundPictureUrl': json_object.get('background_cover_image_url'),
        'firstName': firstName,
        'lastName': lastName,
        'occupation': occupation,
        'profileHeadline': profileHeadline,
        'profileText': profileText,
        'idLocation': idLocation,
        'idLanguages': idLanguages,
        'connections': connections,
        'github': github,
        'facebook': facebook,
        'gender': gender,
        'birthDate': birthDate,
        'industry': industry
    }

In [None]:
import pandas as pd
from sqlalchemy import create_engine

# Extract data
def process_json(json_object):
    """
    Extract data for person table

    Missing attributes will be added as comments, 
    complex attributes will be reciving their own tables and be set to id None for now.
    """
    person_data = {
        # ID (set by db)
        'idOrigin': None,

        'linkedInPubId': json_object.get('public_identifier'),
        'profilePictureUrl': json_object.get('profile_pic_url'),
        'backgroundPictureUrl': json_object.get('background_cover_image_url'),
        'firstName': json_object.get('first_Name'),
        'lastName': json_object.get('last_Name'),
        'occupation': json_object.get('occupation'),
        'profileHeadline': json_object.get('headline'),
        'profileText': json_object.get('summary'),
        'connections': json_object.get('connections'),
        'github': json_object.get('github'),
        'facebook': json_object.get('facebook'),
        'gender': json_object.get('gender'), # CONVERT GENDER TO ENUM M/F !!!
        'birthDate': json_object.get('birth_date'),
        'industry': json_object.get('industry'),
        'interests': json_object.get('interests')
        # People also viewed
        # Recommendations
        # Activities
    }

    # Extract data for DIM_Location table
    location_data = {
        'countryName': json_object.get('location', {}).get('countryName'),
        'countryLetters': json_object.get('location', {}).get('countryLetters'),
        'stateName': json_object.get('location', {}).get('stateName'),
        'cityName': json_object.get('location', {}).get('cityName')
    }
    
    # Extract data for languages table
    languages_data = {
        'sumOfSpoken': len(json_object.get('languages', [])),
        'listOfLanguages': ', '.join(json_object.get('languages', []))
    }

    # Extract data for FACT_Qualification table
    qualification_data = []
    for qualification in json_object.get('qualifications', []):
        qualification_data.append({
            'idOrigin': json_object.get('idOrigin'),
            'idPerson': None,  # Placeholder for idPerson
            'idDuration': None,  # Placeholder for idDuration
            'type': qualification.get('type'),
            'name': qualification.get('name'),
            'idInstitution': None,  # Placeholder for idInstitution
            'description': qualification.get('description')
        })

    # Create DataFrames for each table
    person_df = pd.DataFrame([person_data])
    location_df = pd.DataFrame([location_data])
    languages_df = pd.DataFrame([languages_data])
    qualification_df = pd.DataFrame(qualification_data)

    # Insert data into the database
    engine = create_engine('your_database_url')

    # Insert data into DIM_Location table
    location_df.to_sql('DIM_Location', con=engine, if_exists='append', index=False)
    idLocation = pd.read_sql_query("SELECT LAST_INSERT_ID()", engine).iloc[0, 0]
    person_df['idLocation'] = idLocation

    # Insert data into DIM_Languages table
    languages_df.to_sql('DIM_Languages', con=engine, if_exists='append', index=False)
    idLanguages = pd.read_sql_query("SELECT LAST_INSERT_ID()", engine).iloc[0, 0]
    person_df['idLanguages'] = idLanguages

    # Insert data into FACT_Person table
    person_df.to_sql('FACT_Person', con=engine, if_exists='append', index=False)
    idPerson = pd.read_sql_query("SELECT LAST_INSERT_ID()", engine).iloc[0, 0]
    qualification_df['idPerson'] = idPerson

    # Insert data into FACT_Qualification table
    qualification_df.to_sql('FACT_Qualification', con=engine, if_exists='append', index=False)