In [5]:
import json
import os
import pandas as pd
from pymongo import MongoClient

In [6]:
# Define the folder path where your JSON files are located
folder_path = "Resources"

# Read the "IMDB_API_100_location_geoapify.json" data
geoapify_file_path = os.path.join(folder_path, "IMDB_API_100_location_geoapify.json")
with open(geoapify_file_path, 'r') as geoapify_file:
    geoapify_data = json.load(geoapify_file)

# Read the "IMDB_API_100_location.json" data
location_file_path = os.path.join(folder_path, "IMDB_API_100_location.json")
with open(location_file_path, 'r') as location_file:
    location_data = json.load(location_file)


# Create a combined JSON data with the desired fields
combined_data = {}
for geoapify_entry, location_entry in zip(geoapify_data, location_data):
    movie_id = location_entry.get("id", None)
    if movie_id not in combined_data:
        combined_data[movie_id] = {
            "id": movie_id,
            "locations": []
        }
    
    location_info = {
        "location_name": geoapify_entry.get("location_name", None),
        "lat": geoapify_entry.get("lat", None),
        "lon": geoapify_entry.get("lon", None),
        "location_id": location_entry.get("location_id", None)
    }
    combined_data[movie_id]["locations"].append(location_info)

# Name of the combined JSON file
combined_file_path = 'Resources/IMDB_combined_location_data.json'

# Save the combined data to a single JSON file
with open(combined_file_path, 'w') as combined_file:
    json.dump(combined_data, combined_file)


In [7]:
# Define the folder path where your JSON files are located
folder_path = "Resources"

# Define a dictionary to store the combined data
movie_data = {}

# List of JSON files with their corresponding fields
json_files = [
    {
        "file": "IMDB_API_100_moiveinfo.json",
        "fields": ["id", "image_url", "runningTimeInMinutes", "title", "titleType", "year", "genres", "releaseDate"]
    },
    {
        "file": "IMDB_API_100_rating.json",
        "fields": ["id", "canRate", "rating", "ratingCount", "topRank", "bottomRank", "ratingsHistograms"]
    }
]

# Load the combined location data
location_data_path = os.path.join(folder_path, "IMDB_combined_location_data.json")
with open(location_data_path, 'r') as location_data_file:
    location_data = json.load(location_data_file)

# Iterate through the JSON files and organize the data
for json_file_info in json_files:
    file_path = os.path.join(folder_path, json_file_info["file"])
    with open(file_path, 'r') as json_file:
        data = json.load(json_file)
        for entry in data:
            movie_id = entry.get("id")
            if movie_id not in movie_data:
                movie_data[movie_id] = {}
            for field in json_file_info["fields"]:
                movie_data[movie_id][field] = entry.get(field, None)

            # Include location information based on movie ID
            if movie_id in location_data:
                movie_data[movie_id]["locations"] = location_data[movie_id].get("locations", [])

# Name of the combined JSON file
combined_file_path = 'Resources/combined_data.json'

# Save the combined data to a single JSON file
with open(combined_file_path, 'w') as combined_file:
    json.dump(list(movie_data.values()), combined_file)


In [8]:
# Define the folder path where your JSON files are located
folder_path = "Resources"

# Load the soundtrack data
soundtrack_file_path = os.path.join(folder_path, "IMDB_API_100_soundtrack.json")
with open(soundtrack_file_path, 'r') as soundtrack_file:
    soundtrack_data = json.load(soundtrack_file)

# Load the combined data
combined_file_path = os.path.join(folder_path, "combined_data.json")
with open(combined_file_path, 'r') as combined_file:
    movie_data = json.load(combined_file)

# Create a new dictionary to store the combined data
combined_data = {}

# Iterate through movie data
for movie_entry in movie_data:
    movie_id = movie_entry.get("id")

    # Find soundtracks for this movie
    soundtracks_for_movie = [soundtrack_entry for soundtrack_entry in soundtrack_data if soundtrack_entry.get("id") == movie_id]

    # Process soundtracks to extract artist names and remove IDs
    processed_soundtracks = []
    for soundtrack_entry in soundtracks_for_movie:
        artist_name = None
        if "soundtracks_comment" in soundtrack_entry:
            comment_parts = soundtrack_entry["soundtracks_comment"].split("Performed by")
            if len(comment_parts) > 1:
                artist_name = comment_parts[1].strip()

        processed_soundtrack = {
            "soundtracks_name": soundtrack_entry["soundtracks_name"],
            "artist_name": artist_name
        }
        processed_soundtracks.append(processed_soundtrack)

    # Add the processed soundtracks to the movie entry
    movie_entry["soundtracks"] = processed_soundtracks

    # Add the movie data to the combined dictionary using the movie_id as the key
    combined_data[movie_id] = movie_entry

# Name of the combined JSON file
combined_data_file_path = 'Resources/IMDB_DATA_COMPLETE.JSON'

# Save the combined data to a single JSON file
with open(combined_data_file_path, 'w') as IMDB_DATA_COMPLETE_file:
    json.dump(list(combined_data.values()), IMDB_DATA_COMPLETE_file, indent=4)


print(f"Combined data saved to {combined_data_file_path}")

Combined data saved to Resources/IMDB_DATA_COMPLETE.JSON


In [9]:
# MongoDB connection settings
client = MongoClient('localhost', 27017)  
db = client['IMDB_MOVIES'] 
collection = db['movies']

# Path to the JSON file with combined data
combined_data_file_path = 'Resources/IMDB_DATA_COMPLETE.JSON'

# Load the combined data from the JSON file
with open(combined_data_file_path, 'r') as combined_data_file:
    combined_data = json.load(combined_data_file)

# Insert the data into the collection
inserted_ids = collection.insert_many(combined_data)

# Print
print(f'Movie data loaded into MongoDB collection {collection.name}')


Movie data loaded into MongoDB collection movies
