In [14]:
from elasticsearch import Elasticsearch
from dotenv import load_dotenv
import os

load_dotenv()

client = Elasticsearch(
  os.getenv("ELASTIC_URL"),
  api_key=(os.getenv("ELASTIC_API_KEY"))
)


In [18]:
movies = client.search(index="search-movies")

print(movies['hits'])

{'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}


In [30]:
from elasticsearch import Elasticsearch
from typing import Dict

def create_index(client: Elasticsearch, index_name: str, mapping: Dict):
    try:
        # Use Elasticsearch.indices.create method
        client.indices.create(
            index=index_name,
            body=mapping,
            ignore=400  # Ignore 400 already exists code
        )
        print(f"Created index {index_name} successfully!")
        return True
    except Exception as e:
        print(f"Error creating index {index_name}: {str(e)}")
        return False

# Example usage:
# Assuming you have your Elasticsearch client already created (e.g., 'client')

# Define your mapping
movie_review_mapping = {
    'settings': {
        'number_of_shards': 1,
        'number_of_replicas': 0
    },
    'mappings': {
        'properties': {
            'movie': {
                'properties': {
                    'genres': {'type': 'keyword'},
                    'movieId': {'type': 'keyword'},
                    'title': {'type': 'text'},
                }
            },
            'rating': {'type': 'float'},
            'timestamp': {'type': 'date'},
            'user': {
                'properties': {
                    'age': {'type': 'integer'},
                    'gender': {'type': 'keyword'},
                    'occupation': {'type': 'keyword'},
                    'userId': {'type': 'keyword'},
                }
            }
        }
    }
}

# Call the function to create the index
created_successfully = create_index(client, 'movies_reviews', movie_review_mapping)


  client.indices.create(


Created index movies_reviews successfully!


In [49]:
# Mappings
movie_index_mapping = {
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  },
  "mappings": {
    "properties": {
      "movieId": {
        "type": "keyword"
      },
      "genres": {
        "type": "keyword"
      },
      "title": {
        "type": "text"
      }
    }
  }
}

review_index_mapping = {
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  },
  "mappings": {
    "properties": {
      "movie": {
        "properties": {
          "genres": {
            "type": "keyword"
          },
          "movieId": {
            "type": "keyword"
          },
          "title": {
            "type": "text"
          }
        }
      },
      "rating": {
        "type": "float"
      },
      "timestamp": {
        "type": "date"
      },
      "user": {
        "properties": {
          "age": {
            "type": "integer"
          },
          "gender": {
            "type": "keyword"
          },
          "occupation": {
            "type": "keyword"
          },
          "userId": {
            "type": "keyword"
          }
        }
      }
    }
  }
}

user_index_mapping = {
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  },
  "mappings": {
    "properties": {
      "age": {
        "type": "integer"
      },
      "gender": {
        "type": "keyword"
      },
      "occupation": {
        "type": "keyword"
      },
      "userId": {
        "type": "keyword"
      }
    }
  }
}

list_of_mappings = [movie_index_mapping, review_index_mapping, user_index_mapping]
indexes_names = ["movies", "reviews", "users"]

for i in range(len(list_of_mappings)):
    create_index(client, indexes_names[i], list_of_mappings[i])

  client.indices.create(


Created index movies successfully!
Created index reviews successfully!
Created index users successfully!


In [None]:
nested_review_index_mapping = {
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  },
  "mappings": {
    "properties": {
      "movie": {
        "properties": {
          "genres": {
            "type": "keyword"
          },
          "movieId": {
            "type": "keyword"
          },
          "title": {
            "type": "text"
          }
        }
      },
      "rating": {
        "type": "float"
      },
      "timestamp": {
        "type": "date"
      },
      "user": {
        "properties": {
          "age": {
            "type": "integer"
          },
          "gender": {
            "type": "keyword"
          },
          "occupation": {
            "type": "keyword"
          },
          "userId": {
            "type": "keyword"
          }
        }
      }
    }
  }
}

create_index(client, 'nested_movies_reviews', nested_review_index_mapping)


In [2]:
from pyspark.sql import SparkSession

def test_spark():
    # Create a Spark session
    spark = SparkSession.builder.appName("SparkTest").getOrCreate()

    try:
        # Test Spark by creating a simple DataFrame
        data = [("Alice", 1), ("Bob", 2), ("Charlie", 3)]
        columns = ["Name", "Value"]
        df = spark.createDataFrame(data, columns)

        # Show the DataFrame
        df.show()

        print("Spark is up and running!")

    except Exception as e:
        print("Error occurred:", e)

    finally:
        # Stop the Spark session
        spark.stop()

test_spark()

                                                                                

+-------+-----+
|   Name|Value|
+-------+-----+
|  Alice|    1|
|    Bob|    2|
|Charlie|    3|
+-------+-----+

Spark is up and running!
