In [None]:
import pandas as pd
import io
import requests

# URL to the CSV file
url = "https://github.com/NataKrj/AI-project-2024/raw/main/Results%2026.12.2024/Step_4_company_risk_scores.csv"

# Fetch the content from the URL
response = requests.get(url).content

# Save the file locally
file_name = "Step_4_company_risk_scores.csv"
with open(file_name, "wb") as f:
    f.write(response)

# Now the file is visible in Colab's file browser
print(f"File saved as: {file_name}")

# Load the CSV into a pandas DataFrame
df = pd.read_csv(file_name, sep=',', on_bad_lines='skip', engine='python')

# Display the first few rows of the DataFrame
print(df.head())


File saved as: Step_4_company_risk_scores.csv
          company_name Sanctioned Names                       Status  \
0                   9g         No Match                        error   
1  a.b.c. kickers bvba         No Match               ENT LP Stopped   
2       a.d. froidmont         No Match                   EU Stopped   
3        a.f. security         No Match                   EU Stopped   
4        a.f.m. new sa         No Match  not found in KBO data table   

       matched_keywords  Total_Score risk_level  
0  No relevant keywords          2.0     medium  
1  No relevant keywords          5.0     medium  
2                 court         10.0     medium  
3  No relevant keywords          5.0     medium  
4  No relevant keywords          2.0     medium  


In [None]:
!pip install pymongo
!pip install dnspython



In [None]:
from pymongo import MongoClient
import json
from io import StringIO

# MongoDB connection setup
uri = "mongodb+srv://xxxxxxxxxxx@cluster0.lu6vo.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"  # Replace with your MongoDB URI
client = MongoClient(uri)

# Test the connection
try:
    client.admin.command('ping')
    print("Successfully connected to MongoDB!")
except Exception as e:
    print(f"Error connecting to MongoDB: {e}")
    exit()

# Access the database and collection
db_name = "Company_scoring"
collection_name = "Company_risk_scores"
db = client[db_name]
collection = db[collection_name]

# Path to the uploaded CSV file
file_path = 'Step_4_company_risk_scores.csv'

try:
    # Load the CSV content into a DataFrame
    df = pd.read_csv(
        file_path,
        sep=',',             # delimiter
        quotechar='"',
        on_bad_lines='skip',
        engine='python'      # Use Python engine--> adjusted
    )

    # Check  DataFrame
    print("Preview of the imported DataFrame:")
    print(df.head())

    # Check for rows with mismatched columns
    print("\nNumber of columns in DataFrame:", len(df.columns))
    print("Column Names:", df.columns.tolist())

    # Validate rows with missing/ extra fields
    if df.isnull().any().any():
        print("\nWarning: Missing values detected in the following rows:")
        print(df[df.isnull().any(axis=1)])

        # Fill missing values with "N/A"
        df.fillna("N/A", inplace=True)
        print("\nData after filling missing values:")
        print(df.head())
    else:
        print("\nNo missing values detected.")

    # Rename columns to match desired fields if necessary
    df.columns = ["company_name", "Sanctioned Names", "Status", "matched_keywords", "Total_Score", "risk_level"]

    # Convert DataFrame to a list of JSON objects
    data = json.loads(df.to_json(orient='records'))

    # Preview the data to be inserted
    print("Preview of data to be inserted into MongoDB:")
    print(json.dumps(data[:5], indent=4))

    # Insert data into MongoDB
    if data:
        collection.insert_many(data)
        print("Data successfully imported into MongoDB!")
    else:
        print("No data found in the CSV file.")
except Exception as e:
    print(f"An error occurred: {e}")


Successfully connected to MongoDB!
Preview of the imported DataFrame:
          company_name Sanctioned Names                       Status  \
0                   9g         No Match                        error   
1  a.b.c. kickers bvba         No Match               ENT LP Stopped   
2       a.d. froidmont         No Match                   EU Stopped   
3        a.f. security         No Match                   EU Stopped   
4        a.f.m. new sa         No Match  not found in KBO data table   

       matched_keywords  Total_Score risk_level  
0  No relevant keywords          2.0     medium  
1  No relevant keywords          5.0     medium  
2                 court         10.0     medium  
3  No relevant keywords          5.0     medium  
4  No relevant keywords          2.0     medium  

Number of columns in DataFrame: 6
Column Names: ['company_name', 'Sanctioned Names', 'Status', 'matched_keywords', 'Total_Score', 'risk_level']

No missing values detected.
Preview of data to be i

#Visualization in Mongo database link: https://charts.mongodb.com/charts-mongodb-pbuqwjz/public/dashboards/673dbfd4-c3c0-45fb-818c-bec4f2336c73