In [1]:
import requests
from bs4 import BeautifulSoup
import time

In [2]:
def parse_object_page(url, object_n, object_list):
    """
    Fonction pour parser une page contenant des informations sur un type d'objet.
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.3'}
    
    response = requests.get(url, headers=headers)
    if response.status_code == 200 :

        soup = BeautifulSoup(response.text, "html.parser")
        table = soup.find('table', class_='ak-table')
        if table:
            for row in table.find_all('tr'):
                cells = row.find_all('td')
                if cells :
                    nom_index = 1
    
                    if object_n in ["armes", "equipements"]:
                        type_index = 3
                        niveau_index = 4

                    else:
                        type_index = 2
                        niveau_index = 3

                    if object_n in ["armes", "equipements", "consommables"] :
                        recipe = []
                        time.sleep(2)
                        try:
                            recipe_url = "https://www.dofus-touch.com" + cells[nom_index].find('a')['href']
                            recipe_response = requests.get(recipe_url, headers=headers)
                            recipe_soup = BeautifulSoup(recipe_response.text, "html.parser")
                            
                            # Here we find all elements with class 'ak-column' and 'ak-container'
                            recipe_elements = recipe_soup.find_all(class_='ak-column ak-container col-xs-12 col-sm-6')
                            
                            for element in recipe_elements:
                                # Find all <span> elements with class 'ak-linker'
                                span_elements = element.find_all('span', class_='ak-linker')
                                
                                # Loop through each <span> element and extract the text
                                for span in span_elements:
                                    text = span.text.strip()
                                    if text != '':
                                        recipe.append(text)
                        except:
                            continue 

                        object_info = {
                            "name": cells[nom_index].text.strip(),
                            "type": cells[type_index].text.strip(),
                            "level": cells[niveau_index].text.strip(),
                            "recipe": recipe
                        }
                    else :
                        object_info = {
                            "name": cells[nom_index].text.strip(),
                            "type": cells[type_index].text.strip(),
                            "level": cells[niveau_index].text.strip()
                        }
                    object_list.append(object_info)
            return True  # Indique que la page a été analysée avec succès
    else:
        return False  # Indique qu'il y a eu une erreur lors de la requête

In [3]:
base_url = "https://www.dofus-touch.com/fr/mmorpg/encyclopedie"
objects = {
    "armes": [],
    "equipements": [],
    "consommables": [],
    "ressources": []
}

for object_name, object_list in objects.items():
    print(f"Object : {object_name}")
    time.sleep(5)
    url = f"{base_url}/{object_name}"
    parse_object_page(url, object_name, object_list)
    page = 2
    while True:
        page_url = f"{url}?page={page}"
        success = parse_object_page(page_url, object_name, object_list)
        if not success:
            break
        page += 1
    print(f"{len(object_list)} {object_name} find")

Object : armes
729 armes find
Object : equipements
2037 equipements find
Object : consommables
1263 consommables find
Object : ressources
1842 ressources find


In [5]:
import csv

def save_object_list_to_csv(object_list, filename):
    """
    Create and save the object_list to a CSV file.
    """
    # Define the fieldnames based on the keys in the dictionaries in object_list
    fieldnames = object_list[0].keys()

    # Write object_list to a CSV file
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for obj in object_list:
            writer.writerow(obj)

# Assuming object_list is a dictionary where keys are object types and values are lists of objects
for object_name, object_list in objects.items():

    filename = f"{object_name}.csv"
    # Save the object list to the CSV file
    save_object_list_to_csv(object_list, filename)

In [62]:
import csv
import ast

def read_csv_file(filename):
    data = []  # List to store dictionaries for each row
    with open(filename, 'r', newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)  # Using DictReader to directly read rows into dictionaries
        for row in reader:
            # Convert the recipe string into a list
            try :
                row['recipe'] = ast.literal_eval(row['recipe'])
            except :
                pass

            for key, value in row.items():
                if isinstance(value, str):  # Check if the value is a string
                    row[key] = value.replace("'", " ")
                elif isinstance(value, list):  # Check if the value is a list
                    row[key] = [element.replace("'", " ") for element in value]
                    
            data.append(row)
    return data

# Example usage:
objects = {
    "armes": [],
    "equipements": [],
    "consommables": [],
    "ressources": []
}

for object_name, object_list in objects.items():
    object_list.extend(read_csv_file(f"{object_name}.csv"))

In [105]:
from neo4j import GraphDatabase

# URI examples: "neo4j://localhost", "neo4j+s://xxx.databases.neo4j.io"
URI = "bolt://localhost:7687" 
AUTH = ("neo4j", "w0XLPdmPJ8Y_9HCTcfPrRPno3q_KQZdkh60dS0FsmCE")

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()

In [102]:
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    for object_name, object_list in objects.items():
        print(f"object_name : {object_name}")
        for obj in object_list:
            if object_name == "ressources":
                driver.execute_query(
                    f"CREATE (:{object_name} {{"
                    f"name: '{obj['name']}', "
                    f"type: '{obj['type']}', "
                    f"level: '{obj['level']}'"
                    f"}})"
                )
            else:
                driver.execute_query(
                    f"CREATE (:{object_name} {{"
                    f"name: '{obj['name']}', "
                    f"type: '{obj['type']}', "
                    f"level: '{obj['level']}', "
                    f"recipe: {obj.get('recipe', [])}"  # Use get method to handle missing 'recipe' key
                    f"}})"
                )

object_name : armes
object_name : equipements
object_name : consommables
object_name : ressources


In [110]:
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    for object_name, object_list in objects.items():
        for obj in object_list:
            if obj.get('recipe'):
                for resource_name in obj['recipe']:
                    # Create a relationship between the object and the resource
                    driver.execute_query(
                        f"MATCH (o:{object_name} {{name: '{obj['name']}'}}), (r:resources {{name: '{resource_name}'}}) "
                        "CREATE (o)-[:CONTAINS]->(r)"
                    )