<a href="https://colab.research.google.com/github/A-R-TAQI/semantic_analysis/blob/main/SemanticWeb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ntscraper

Collecting ntscraper
  Downloading ntscraper-0.3.6-py3-none-any.whl (10 kB)
Installing collected packages: ntscraper
Successfully installed ntscraper-0.3.6


In [None]:
import pandas as pd
from ntscraper import Nitter

In [None]:
scraper = Nitter(0)

Testing instances: 100%|██████████| 31/31 [00:40<00:00,  1.31s/it]


In [None]:
tweets = scraper.get_tweets("ElectricVehicles", mode = 'hashtag', number=1000)
final_tweets = []
for x in tweets['tweets']:
    data = [x['user']['name'], x['text'],x['date'],x['stats']['likes'],x['stats']['comments']]
    final_tweets.append(data)



In [None]:
data = pd.DataFrame(final_tweets, columns =['name','text','date','likes','comments'])
data.to_csv('SolarVehicles.csv')

In [None]:
#To add a sentiments attribute to the dataset
from textblob import TextBlob
data = pd.read_csv('ElectricVehicles.csv')

def get_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:  # Positive sentiment
        return 1
    elif analysis.sentiment.polarity == 0:  # Neutral sentiment
        return 0
    else:  # Negative sentiment
        return -1

# Applying sentiment analysis and creating a new column 'sentiments'
data['sentiments'] = data['text'].apply(get_sentiment)

# Saving the modified dataset with the new column
data.to_csv('ElectricVehicles.csv', index=False)


In [None]:
!pip install rdflib



In [None]:
import pandas as pd
from rdflib import Graph, Literal, Namespace, RDF, URIRef, XSD
from urllib.parse import quote
from datetime import datetime

data = pd.read_csv('ElectricVehicles.csv')
g = Graph()
ex = Namespace("http://pavan.org/")  # Your namespace

# Function to convert date string to ISO 8601 format
def convert_to_iso_date(date_str):
    # Example: Convert 'Dec 15, 2023 · 8:26 AM UTC' to '2023-12-15T08:26:00Z'
    date_obj = datetime.strptime(date_str, '%b %d, %Y · %I:%M %p %Z')
    return date_obj.strftime('%Y-%m-%dT%H:%M:%SZ')

# Iterate through the CSV data and create RDF triples
for index, row in data.iterrows():
    user_uri = URIRef(ex + quote(f"user_{index}"))  # Generate a valid URI for each user
    tweet_uri = URIRef(ex + quote(f"tweet_{index}"))  # Generate a valid URI for each tweet

    g.add((user_uri, RDF.type, ex.User))
    g.add((user_uri, ex.hasName, Literal(row['name'])))

    iso_date = convert_to_iso_date(row['date'])

    g.add((tweet_uri, RDF.type, ex.Tweet))
    g.add((tweet_uri, ex.hasText, Literal(row['text'])))
    g.add((tweet_uri, ex.hasDate, Literal(iso_date, datatype=XSD.dateTime)))
    g.add((tweet_uri, ex.hasLikes, Literal(row['likes'], datatype=XSD.integer)))
    g.add((tweet_uri, ex.hasComments, Literal(row['comments'], datatype=XSD.integer)))
    g.add((tweet_uri, ex.hasSentiment, Literal(row['sentiments'], datatype=XSD.integer)))
    g.add((tweet_uri, ex.postedBy, user_uri))
# Serialize the RDF graph to RDF/XML and save it to a file
rdf_output = g.serialize(destination='ElectricVehicles.rdf',format='xml')
