# Importing csv files into postgres database

## Steps

- import csv into pandas dataframe
- explore for potential inconsistencies with database schema
- clean the table data where needed e.g. change data types
- alter database schema where needed
- import file into db

# Libraries and Modules

In [1]:
import os
import numpy as np
import pandas as pd
import psycopg2
from psycopg2 import Error
from psycopg2.extensions import register_adapter, AsIs
import datetime
import csv
from jproperties import Properties


## Data

In [2]:
Person = pd.read_csv("social_network/person_0_0.csv", sep='|')
PersonEmail = pd.read_csv("social_network/person_email_emailaddress_0_0.csv", sep='|')
PersonLanguage = pd.read_csv("social_network/person_speaks_language_0_0.csv", sep='|')
Organizations = pd.read_csv("social_network/organisation_0_0.csv", sep='|')
PersonStudyAtUniversity = pd.read_csv("social_network/person_studyAt_organisation_0_0.csv", sep='|')
PersonWorkAtCompany = pd.read_csv("social_network/person_workAt_organisation_0_0.csv", sep='|')
Forum = pd.read_csv("social_network/forum_0_0.csv", sep='|')
ForumHasMember = pd.read_csv("social_network/forum_hasMember_person_0_0.csv", sep='|')
ForumHasTag = pd.read_csv("social_network/forum_hasTag_tag_0_0.csv", sep='|')
Post = pd.read_csv("social_network/post_0_0.csv", sep='|')
Comment = pd.read_csv("social_network/comment_0_0.csv", sep='|')
Comment_hastag = pd.read_csv("social_network/comment_hasTag_tag_0_0.csv", sep='|')
Tag = pd.read_csv("social_network/tag_0_0.csv", sep='|')
TagClass = pd.read_csv("social_network/tagclass_0_0.csv", sep='|')
TagClass_isSubclass = pd.read_csv("social_network/tagclass_isSubclassOf_tagclass_0_0.csv", sep='|')
PostHasTag = pd.read_csv("social_network/post_hasTag_tag_0_0.csv", sep='|')
TagHasType = pd.read_csv("social_network/tag_hasType_tagclass_0_0.csv", sep='|')
Place = pd.read_csv("social_network/place_0_0.csv", sep='|')
PersonKnowsPerson = pd.read_csv("social_network/person_knows_person_0_0.csv", sep='|')
PersonHasInterest = pd.read_csv("social_network/person_hasInterest_tag_0_0.csv", sep='|')
PersonLikesComment = pd.read_csv("social_network/person_likes_comment_0_0.csv", sep='|')
PersonLikesPost = pd.read_csv("social_network/person_likes_post_0_0.csv", sep='|')

## Place (City, Country, Continent)

In [4]:
Place.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1466 entries, 0 to 1465
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   id        1466 non-null   int64  
 1   name      1466 non-null   object 
 2   url       1466 non-null   object 
 3   type      1466 non-null   object 
 4   isPartOf  1460 non-null   float64
dtypes: float64(1), int64(1), object(3)
memory usage: 57.4+ KB


In [5]:
continent_subset = Place[Place['type'] == 'continent']
country_subset = Place[Place['type'] == 'country']
city_subset = Place[Place['type'] == 'city']

In [9]:
#Import to Place

query = """INSERT INTO Place (
    PlaceID, 
    name) 
    VALUES (%s, %s)"""


connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

with connection.cursor() as cursor:
    
    for index, row in Place.iterrows():
        
        #Extracting values from dataframe
        PlaceID_value = row['id']
        name_value = row['name']
        
        
        # Execute query
        cursor.execute(query, (PlaceID_value, name_value))
        connection.commit()

# Close database connection
connection.close()

In [10]:
#Continent
query = """INSERT INTO Continent (
    ContinentID, 
    name) 
    VALUES (%s, %s)"""


connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

with connection.cursor() as cursor:
    
    for index, row in continent_subset.iterrows():
        
        #Extracting values from dataframe
        ContinentID_value = row['id']
        name_value = row['name']
        
        
        # Execute query
        cursor.execute(query, (ContinentID_value, name_value))
        connection.commit()

# Close database connection
connection.close()

In [None]:
#Country
query = """INSERT INTO Country (
    CountryID, 
    name,
    IsPartOf) 
    VALUES (%s, %s, %s)"""


connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

with connection.cursor() as cursor:
    
    for index, row in country_subset.iterrows():
        
        #Extracting values from dataframe
        CountryID_value = row['id']
        name_value = row['name']
        IsPartOf_value = row['IsPartOf']
        
        
        # Execute query
        cursor.execute(query, (CountryID_value, name_value, IsPartOf_value))
        connection.commit()

# Close database connection
connection.close()

In [30]:
#City
query = """INSERT INTO City (
    CityID, 
    name,
    IsPartOf) 
    VALUES (%s, %s, %s)"""


connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

with connection.cursor() as cursor:
    
    for index, row in city_subset.iterrows():
        
        #Extracting values from dataframe
        CityID_value = row['id']
        name_value = row['name']
        IsPartOf_value= row['isPartOf']
        
        
        # Execute query
        cursor.execute(query, (CityID_value, name_value, IsPartOf_value))
        connection.commit()

# Close database connection
connection.close()

## Person Table

In [31]:
Person.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88 entries, 0 to 87
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            88 non-null     int64 
 1   firstName     88 non-null     object
 2   lastName      88 non-null     object
 3   gender        88 non-null     object
 4   birthday      88 non-null     object
 5   creationDate  88 non-null     object
 6   locationIP    88 non-null     object
 7   browserUsed   88 non-null     object
 8   place         88 non-null     int64 
dtypes: int64(2), object(7)
memory usage: 6.3+ KB


In [33]:
#Change datatypes

Person['creationDate'] = pd.to_datetime(Person['creationDate'], errors='coerce')
Person['birthday'] = pd.to_datetime(Person['birthday'], errors='coerce')


In [36]:
#insert data

query = """INSERT INTO Person (
    PersonID, 
    CreationDate, 
    FirstName, 
    LastName, 
    Gender, 
    Birthday, 
    BrowserUsed, 
    LocationIP,
    Place) 
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"""



#create connection by replacing the database dtails with your database info

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in Person.iterrows():
        # Extract values from the DataFrame
        PersonID_value = row['id']
        CreationDate_value = row['creationDate']
        FirstName_value = row['firstName']
        LastName_value = row['lastName']
        Gender_value = row['gender']
        Birthday_value = row['birthday']
        BrowserUsed_value = row['browserUsed']
        LocationIP_value = row['locationIP']
        Place_value = row['place']
        
        # Execute the SQL query
        cursor.execute(query, (PersonID_value,
                               CreationDate_value,
                               FirstName_value,
                               LastName_value,
                               Gender_value,
                               Birthday_value,
                               BrowserUsed_value,
                               LocationIP_value,
                              Place_value))
        connection.commit()

# Close the database connection
connection.close()

## PersonEmail

In [30]:
PersonEmail.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152 entries, 0 to 151
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Person.id  152 non-null    int64 
 1   email      152 non-null    object
dtypes: int64(1), object(1)
memory usage: 2.5+ KB


In [37]:
#insert data

query = """INSERT INTO PersonEmail (
    PersonID, 
    Email) 
    VALUES (%s, %s)"""



connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in PersonEmail.iterrows():
        # Extract values from the DataFrame
        PersonID_value = row['Person.id']
        Email_value = row['email']
        
        # Execute the SQL query
        cursor.execute(query, (PersonID_value,
                               Email_value))
        connection.commit()


connection.close()

## Language and PersonLanguage

In [38]:
PersonLanguage.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 184 entries, 0 to 183
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Person.id  184 non-null    int64 
 1   language   184 non-null    object
dtypes: int64(1), object(1)
memory usage: 3.0+ KB


In [39]:
query = """
INSERT INTO PersonLanguage (
    PersonID,
    Language
) VALUES (%s, %s)

"""

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in PersonLanguage.iterrows():
        # Extract values from the DataFrame
        PersonID_value = row['Person.id']
        Language_value = row['language']
        
        # Execute the SQL query
        cursor.execute(query, (PersonID_value,
                               Language_value))
        connection.commit()


connection.close()

## PersonKnowsPerson

In [40]:
PersonKnowsPerson.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 224 entries, 0 to 223
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Person.id     224 non-null    int64 
 1   Person.id.1   224 non-null    int64 
 2   creationDate  224 non-null    object
dtypes: int64(2), object(1)
memory usage: 5.4+ KB


In [41]:
PersonKnowsPerson['creationDate'] = pd.to_datetime(PersonKnowsPerson['creationDate'], errors='coerce')

In [42]:
#inser into personknowsperson
query = """INSERT INTO PersonKnowsPerson (
    PersonID_A, 
    PersonID_B,
    CreationDate) 
    VALUES (%s, %s, %s)"""


# Iterate over rows in the DataFrame and insert data into the table

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in PersonKnowsPerson.iterrows():
        # Extract values from the DataFrame
        PersonID_A_value = row['Person.id']
        PersonID_B_value = row['Person.id.1']
        CreationDate_value = row['creationDate']

        
        
        cursor.execute(query, (PersonID_A_value,
                               PersonID_B_value,
                              CreationDate_value))
        connection.commit()

# Close the database connection
connection.close()

## University/Company

In [38]:
Organizations.head()

Unnamed: 0,id,type,name,url,place
0,0,company,Kam_Air,http://dbpedia.org/resource/Kam_Air,59
1,1,company,Balkh_Airlines,http://dbpedia.org/resource/Balkh_Airlines,59
2,2,company,Khyber_Afghan_Airlines,http://dbpedia.org/resource/Khyber_Afghan_Airl...,59
3,3,company,MarcoPolo_Airways,http://dbpedia.org/resource/MarcoPolo_Airways,59
4,4,company,Pamir_Airways,http://dbpedia.org/resource/Pamir_Airways,59


In [None]:
"""
UniversityID INT PRIMARY KEY,
    Name VARCHAR(255) NOT NULL,
	URL VARCHAR(255),
	Place INT,
	FOREIGN KEY (Place) REFERENCES Place(PlaceID)
);

"""

In [43]:
company_filter = Organizations['type'] == 'company'
university_filter = Organizations['type'] == 'university'

company = Organizations.loc[company_filter]
university = Organizations.loc[university_filter]

In [46]:
#insert company data

query = """INSERT INTO Company (
    CompanyID, 
    Name,
    URL, 
    Place) 
    VALUES (%s, %s, %s, %s)"""



connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in company.iterrows():
        # Extract values from the DataFrame
        CompanyID_value = row['id']
        Name_value = row['name']
        URL_value = row['url']
        Place_value = row['place']
        
        # Execute the SQL query
        cursor.execute(query, (CompanyID_value,
                               Name_value,
                              URL_value,
                              Place_value))
        connection.commit()

# Close the database connection
connection.close()

In [48]:
#insert University data

query = """INSERT INTO University (
    UniversityID, 
    Name,
    URL, 
    Place) 
    VALUES (%s, %s, %s, %s)"""



connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in university.iterrows():
        # Extract values from the DataFrame
        UniversityID_value = row['id']
        Name_value = row['name']
        URL_value = row['url']
        Place_value = row['place']
        
        # Execute the SQL query
        cursor.execute(query, (UniversityID_value,
                               Name_value,
                              URL_value,
                              Place_value))
        connection.commit()

# Close the database connection
connection.close()

# PersonStudyAtUniversity

In [53]:
PersonStudyAtUniversity.head()

Unnamed: 0,Person.id,Organisation.id,classYear
0,2199023255625,2207,2002
1,16492674416673,1596,2002
2,2199023255611,2207,2001
3,13194139533352,1856,2007
4,3298534883391,2833,2005


In [54]:
PersonStudyAtUniversity.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71 entries, 0 to 70
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   Person.id        71 non-null     int64
 1   Organisation.id  71 non-null     int64
 2   classYear        71 non-null     int64
dtypes: int64(3)
memory usage: 1.8 KB


In [49]:
#insert personstudyatuniversity

register_adapter(np.int64, AsIs)

query = """INSERT INTO PersonStudyAtUniversity (
    PersonID, 
    UniversityID,
    ClassYear) 
    VALUES (%s, %s, %s)"""



# Iterate over rows in the DataFrame and insert data into the table

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in PersonStudyAtUniversity.iterrows():
        # Extract values from the DataFrame
        PersonID_value = row['Person.id']
        UniversityID_value = row['Organisation.id']
        ClassYear_value = row['classYear']
        
        
        
        # Execute the SQL query
        cursor.execute(query, (PersonID_value,
                               UniversityID_value,
                              ClassYear_value))
        connection.commit()

# Close the database connection
connection.close()

## PersonWorkAtCompany

In [7]:
PersonWorkAtCompany.head()

Unnamed: 0,Person.id,Organisation.id,workFrom
0,12094627905604,897,2009
1,12094627905604,887,2008
2,12094627905604,888,2012
3,12094627905604,890,2011
4,12094627905604,893,2009


In [50]:
query = """INSERT INTO PersonWorkAtCompany (
    PersonID, 
    CompanyID,
    WorkFrom) 
    VALUES (%s, %s, %s)"""



# Iterate over rows in the DataFrame and insert data into the table

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in PersonWorkAtCompany.iterrows():
        # Extract values from the DataFrame
        PersonID_value = row['Person.id']
        CompanyID_value = row['Organisation.id']
        WorkFrom_value = row['workFrom']
        
        
        
        # Execute the SQL query
        cursor.execute(query, (PersonID_value,
                               CompanyID_value,
                              WorkFrom_value))
        connection.commit()

# Close the database connection
connection.close()

## Forum

In [51]:

Forum.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 799 entries, 0 to 798
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            799 non-null    int64 
 1   title         799 non-null    object
 2   creationDate  799 non-null    object
 3   moderator     799 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 25.1+ KB


In [52]:
Forum['creationDate'] = pd.to_datetime(Forum['creationDate'], errors='coerce')

In [53]:
query = """INSERT INTO Forum (
    ForumID, 
    Title,
    CreateDate,
    Moderator) 
    VALUES (%s, %s, %s, %s)"""


# Iterate over rows in the DataFrame and insert data into the table

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in Forum.iterrows():
        # Extract values from the DataFrame
        ForumID_value = row['id']
        Title_value = row['title']
        CreationDate_value = row['creationDate']
        Moderator_value = row['moderator']
        
        
        cursor.execute(query, (ForumID_value,
                               Title_value,
                              CreationDate_value,
                              Moderator_value))
        connection.commit()

# Close the database connection
connection.close()

## Forum Has Member 

In [54]:
ForumHasMember.head()

Unnamed: 0,Forum.id,Person.id,joinDate
0,17179869197,9895604649984,2011-09-15T05:28:33.616+0000
1,17179869197,8796093022217,2011-07-26T04:00:02.456+0000
2,17179869197,15393162788888,2012-07-11T04:36:42.044+0000
3,17179869197,9895604650020,2011-10-18T05:30:38.405+0000
4,17179869197,16492674416689,2012-09-11T04:07:25.731+0000


In [55]:
ForumHasMember.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1782 entries, 0 to 1781
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Forum.id   1782 non-null   int64 
 1   Person.id  1782 non-null   int64 
 2   joinDate   1782 non-null   object
dtypes: int64(2), object(1)
memory usage: 41.9+ KB


In [56]:
ForumHasMember['joinDate'] = pd.to_datetime(ForumHasMember['joinDate'], errors='coerce')

In [60]:
from psycopg2 import Error

query = """INSERT INTO ForumHasMember (
    ForumID, 
    PersonID,
    JoinDate) 
    VALUES (%s, %s, %s)"""

# Connection parameters
connection_params = {
    "dbname": "social_3",
    "user": "postgres",
    "password": "password",
    "host": "localhost",
    "port": "5432"
}

try:
    # Connect to the database
    connection = psycopg2.connect(**connection_params)
    
    # Create cursor
    with connection.cursor() as cursor:
        # Iterate over rows in the DataFrame and insert data into the table
        for index, row in ForumHasMember.iterrows():
            # Extract values from the DataFrame
            ForumID_value = row['Person.id']
            PersonID_value = row['Forum.id']
            JoinDate_value = row['joinDate']

            try:
                # Execute the insert query
                cursor.execute(query, (ForumID_value, PersonID_value, JoinDate_value))
                # Commit the transaction
                connection.commit()
            except psycopg2.errors.ForeignKeyViolation as e:
                # Skip conflicting data and print the error message
                print(f"Skipping conflicting data. Error: {e}")
            except Error as e:
                # Handle other database errors
                print(f"Error executing query: {e}")
                connection.rollback()  # Rollback the transaction
            
except psycopg2.Error as error:
    # Handle connection errors
    print("Error connecting to PostgreSQL:", error)
finally:
    # Close the database connection
    if connection:
        connection.close()


## Post

In [61]:
Post.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7213 entries, 0 to 7212
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            7213 non-null   int64 
 1   imageFile     7001 non-null   object
 2   creationDate  7213 non-null   object
 3   locationIP    7213 non-null   object
 4   browserUsed   7213 non-null   object
 5   language      212 non-null    object
 6   content       212 non-null    object
 7   length        7213 non-null   int64 
 8   creator       7213 non-null   int64 
 9   Forum.id      7213 non-null   int64 
 10  place         7213 non-null   int64 
dtypes: int64(5), object(6)
memory usage: 620.0+ KB


In [63]:
Post['creationDate'] = pd.to_datetime(Post['creationDate'], errors='coerce')

In [64]:
query = """INSERT INTO Post (
    PostID,
    ImageFile,
    CreationDate,
    LocationIP,
    BrowserUsed,
    Language,
    Content,
    Length,
    Creator,
    ForumID,
    Place) 
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""



connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in Post.iterrows():
        PostID_value = row['id']
        ImageFile_value = row['imageFile']
        CreationDate_value = row['creationDate']
        LocationIP_value = row['locationIP']
        BrowserUsed_value = row['browserUsed']
        Language_value = row['language']
        Content_value = row['content']
        Length_value = row['length']
        Creator_value = row['creator']
        ForumID_value = row['Forum.id']
        Place_value = row['place']
        
        
         
        
        cursor.execute(query, (PostID_value,
                               ImageFile_value,
                               CreationDate_value,
                               LocationIP_value,
                               BrowserUsed_value,
                               Language_value,
                               Content_value,
                               Length_value,
                               Creator_value,
                               ForumID_value,
                               Place_value))
        connection.commit()

# Close the database connection
connection.close()

## Comment

In [66]:
Comment.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1987 entries, 0 to 1986
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               1987 non-null   int64  
 1   creationDate     1987 non-null   object 
 2   locationIP       1987 non-null   object 
 3   browserUsed      1987 non-null   object 
 4   content          1987 non-null   object 
 5   length           1987 non-null   int64  
 6   creator          1987 non-null   int64  
 7   place            1987 non-null   int64  
 8   replyOfPost      1034 non-null   float64
 9   replyOfComment;  1982 non-null   object 
dtypes: float64(1), int64(4), object(5)
memory usage: 155.4+ KB


In [81]:
Comment['creationDate'] = pd.to_datetime(Comment['creationDate'], errors='coerce')

In [84]:
query = """INSERT INTO Comment (
    CommentID,
    CreationDate,
    LocationIP,
    BrowserUsed,
    Content,
    Length,
    Creator,
    Place) 
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"""


connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in Comment.iterrows():
        CommentID_value = row['id']
        CreationDate_value = row['creationDate']
        LocationIP_value = row['locationIP']
        BrowserUsed_value = row['browserUsed']
        Content_value = row['content']
        Length_value = row['length']
        Creator_value = row['creator']
        Place_value = row['place']
        
        
        
        
        cursor.execute(query, (CommentID_value,
                               CreationDate_value,
                               LocationIP_value,
                               BrowserUsed_value,
                               Content_value,
                               Length_value,
                               Creator_value,
                               Place_value))
        connection.commit()

# Close the database connection
connection.close()

# Tag Table

In [85]:
Tag.head()

Unnamed: 0,id,name,url
0,0,Hamid_Karzai,http://dbpedia.org/resource/Hamid_Karzai
1,1,Rumi,http://dbpedia.org/resource/Rumi
2,2,Mahmud_of_Ghazni,http://dbpedia.org/resource/Mahmud_of_Ghazni
3,3,Abbas_I_of_Persia,http://dbpedia.org/resource/Abbas_I_of_Persia
4,4,Humayun,http://dbpedia.org/resource/Humayun


In [86]:
Tag['name'] = Tag['name'].astype(str)

In [87]:

query = """INSERT INTO Tag (
    TagID,
    name,
    url) 
    VALUES (%s, %s, %s)"""


# Iterate over rows in the DataFrame and insert data into the table

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in Tag.iterrows():
        # Extract values from the DataFrame
        tagid_value = row['id']
        name_value = row['name']
        url_value = row['url']
        
        
         
        
        cursor.execute(query, (tagid_value, name_value, url_value))
        connection.commit()

# Close the database connection
connection.close()

## Tag Class Table

In [88]:
TagClass.head()

Unnamed: 0,id,name,url
0,349,OfficeHolder,http://dbpedia.org/ontology/OfficeHolder
1,211,Person,http://dbpedia.org/ontology/Person
2,239,Agent,http://dbpedia.org/ontology/Agent
3,0,Thing,http://www.w3.org/2002/07/owl#Thing
4,98,Monarch,http://dbpedia.org/ontology/Monarch


In [89]:
TagClass.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71 entries, 0 to 70
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      71 non-null     int64 
 1   name    71 non-null     object
 2   url     71 non-null     object
dtypes: int64(1), object(2)
memory usage: 1.8+ KB


In [90]:

query = """INSERT INTO TagClass (
    TagClassID,
    name,
    url) 
    VALUES (%s, %s, %s)"""


# Iterate over rows in the DataFrame and insert data into the table

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in TagClass.iterrows():
        # Extract values from the DataFrame
        tagclassid_value = row['id']
        name_value = row['name']
        url_value = row['url']
        
        
         
        
        cursor.execute(query, (tagclassid_value, name_value, url_value))
        connection.commit()

# Close the database connection
connection.close()

## Post Has Tag

In [91]:
PostHasTag.head()

Unnamed: 0,Post.id,Tag.id
0,68719476859,6
1,68719476859,1199
2,128849019004,6
3,68719476861,6
4,111669149822,6


In [93]:
query = """INSERT INTO PostHasTag (
    PostID,
    TagID) 
    VALUES (%s, %s)"""


# Iterate over rows in the DataFrame and insert data into the table

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in PostHasTag.iterrows():
        # Extract values from the DataFrame
        postid_value = row['Post.id']
        tagid_value = row['Tag.id']
        
        
        
         
        
        cursor.execute(query, (postid_value, tagid_value))
        connection.commit()

# Close the database connection
connection.close()

## Tag Has Type Tagclass

In [94]:
TagHasType.head()

Unnamed: 0,Tag.id,TagClass.id
0,0,349
1,1,211
2,2,98
3,3,336
4,4,336


In [95]:
TagHasType['Tag.id'] = TagHasType['Tag.id'].astype('int64')
TagHasType['TagClass.id'] = TagHasType['TagClass.id'].astype('int64')

In [96]:
TagHasType.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16080 entries, 0 to 16079
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   Tag.id       16080 non-null  int64
 1   TagClass.id  16080 non-null  int64
dtypes: int64(2)
memory usage: 251.4 KB


In [97]:
register_adapter(np.int64, AsIs)

query = """INSERT INTO Tag_HasType_TagClass (
    TagID,
    TagClassID) 
    VALUES (%s, %s)"""


# Iterate over rows in the DataFrame and insert data into the table

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in TagHasType.iterrows():
        # Extract values from the DataFrame
        tagid_value = row['Tag.id']
        tagclassid_value = row['TagClass.id']
        
        
        
         
        
        cursor.execute(query, (tagid_value, tagclassid_value))
        connection.commit()

# Close the database connection
connection.close()

## Tagclass_is_subclass

In [99]:
TagClass_isSubclass.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70 entries, 0 to 69
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   TagClass.id    70 non-null     int64
 1   TagClass.id.1  70 non-null     int64
dtypes: int64(2)
memory usage: 1.2 KB


In [100]:
register_adapter(np.int64, AsIs)

query = """INSERT INTO TagClass_issubclass_Tagclass (
    TagClassID_A,
    TagClassID_B) 
    VALUES (%s, %s)"""


# Iterate over rows in the DataFrame and insert data into the table

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in TagClass_isSubclass.iterrows():
        # Extract values from the DataFrame
        TagClassID_A_value = row['TagClass.id']
        TagClassID_B_value = row['TagClass.id.1']
        
        
        
         
        
        cursor.execute(query, (TagClassID_A_value, TagClassID_B_value))
        connection.commit()

# Close the database connection
connection.close()

## Comment Has Tag

In [101]:
Comment_hastag.head()

Unnamed: 0,Comment.id,Tag.id
0,68719476865,2092
1,68719476867,2808
2,68719476868,291
3,68719476870,2036
4,68719476872,543


In [102]:
register_adapter(np.int64, AsIs)

query = """INSERT INTO CommentHasTag (
    CommentID,
    TagID) 
    VALUES (%s, %s)"""


# Iterate over rows in the DataFrame and insert data into the table

connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in Comment_hastag.iterrows():
        # Extract values from the DataFrame
        CommentID_value = row['Comment.id']
        TagID_value = row['Tag.id']
        
        
        
         
        
        cursor.execute(query, (CommentID_value, TagID_value))
        connection.commit()

# Close the database connection
connection.close()

## Person Likes Post

In [103]:
PersonLikesPost.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 765 entries, 0 to 764
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Person.id     765 non-null    int64 
 1   Post.id       765 non-null    int64 
 2   creationDate  765 non-null    object
dtypes: int64(2), object(1)
memory usage: 18.1+ KB


In [104]:
PersonLikesPost['creationDate'] = pd.to_datetime(PersonLikesPost['creationDate'], errors='coerce')

In [105]:
query = """INSERT INTO PersonLikesPost (
    PersonID,
    PostID,
    CreationDate) 
    VALUES (%s, %s, %s)"""



connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in PersonLikesPost.iterrows():
        # Extract values from the DataFrame
        PersonID_value = row['Person.id']
        PostID_value = row['Post.id']
        CreationDate_value= row['creationDate']
        
        
        
         
        
        cursor.execute(query, (PersonID_value, PostID_value, CreationDate_value))
        connection.commit()

# Close the database connection
connection.close()

## Person Likes Comment

In [106]:
PersonLikesComment.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Person.id     374 non-null    int64 
 1   Comment.id    374 non-null    int64 
 2   creationDate  374 non-null    object
dtypes: int64(2), object(1)
memory usage: 8.9+ KB


In [107]:
PersonLikesComment['creationDate'] = pd.to_datetime(PersonLikesComment['creationDate'], errors='coerce')

In [111]:
query = """INSERT INTO PersonLikesComment (
    PersonID,
    CommentID,
    CreationDate) 
    VALUES (%s, %s, %s)"""



connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in PersonLikesComment.iterrows():
        # Extract values from the DataFrame
        PersonID_value = row['Person.id']
        CommentID_value = row['Comment.id']
        CreationDate_value= row['creationDate']
        
        
        
         
        
        cursor.execute(query, (PersonID_value, CommentID_value, CreationDate_value))
        connection.commit()

# Close the database connection
connection.close()

## Person Has Interest

In [113]:
PersonHasInterest.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1992 entries, 0 to 1991
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   Person.id  1992 non-null   int64
 1   Tag.id     1992 non-null   int64
dtypes: int64(2)
memory usage: 31.2 KB


In [114]:
query = """INSERT INTO PersonHasInterestTag (
    PersonID,
    TagID) 
    VALUES (%s, %s)"""



connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in PersonHasInterest.iterrows():
        # Extract values from the DataFrame
        PersonID_value = row['Person.id']
        TagID_value = row['Tag.id']
        
        
        
         
        
        cursor.execute(query, (PersonID_value, TagID_value))
        connection.commit()

# Close the database connection
connection.close()

## Forum Has Tag

In [115]:
ForumHasTag.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2703 entries, 0 to 2702
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   Forum.id  2703 non-null   int64
 1   Tag.id    2703 non-null   int64
dtypes: int64(2)
memory usage: 42.4 KB


In [116]:
query = """INSERT INTO ForumHasTag (
    ForumID,
    TagID) 
    VALUES (%s, %s)"""



connection = psycopg2.connect(
        dbname="social_3",
        user="postgres",
        password="password",
        host="localhost",
        port="5432"
    )

   
with connection.cursor() as cursor:
    
    for index, row in ForumHasTag.iterrows():
        # Extract values from the DataFrame
        ForumID_value = row['Forum.id']
        TagID_value = row['Tag.id']
        
        
        
         
        
        cursor.execute(query, (ForumID_value, TagID_value))
        connection.commit()

# Close the database connection
connection.close()