Skip to content

Commit

Permalink
Pushing latest
Browse files Browse the repository at this point in the history
  • Loading branch information
Oscar-Rydh committed Apr 10, 2017
1 parent 8b354ef commit 50de61a
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -7,3 +7,4 @@ startup/PID_DIR/*
*.log
web/.idea
twitter-geo.dump
allCountries.txt
22 changes: 22 additions & 0 deletions database/create_tables.sql
Expand Up @@ -77,5 +77,27 @@ CREATE TABLE IF NOT EXISTS tweet_hashtags (
FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);

CREATE TABLE IF NOT EXISTS geonames (
geonameid INT,
name VARCHAR (200),
asciiname VARCHAR (200),
alternatenames VARCHAR (10000),
latitude DECIMAL,
longitude DECIMAL,
feature_class CHAR(1),
feature_code VARCHAR (10),
country_code VARCHAR (2),
cc2 VARCHAR (200),
admin1_code VARCHAR (20),
admin2_code VARCHAR (80),
admin3_code VARCHAR (20),
admin4_code VARCHAR (20),
population BIGINT,
elevation INTEGER,
dem INTEGER,
timezone VARCHAR (40),
modification_date date
);

CREATE VIEW trumps_tweets AS
SELECT * FROM tweets WHERE user_id = 25073877;
42 changes: 41 additions & 1 deletion database/database.py
Expand Up @@ -292,4 +292,44 @@ def updateDetectedLanguage(self, id, detected_language):
data = [detected_language, id]
cur.execute(statement, data)
self.conn.commit()
cur.close()
cur.close()



def loadCountries(self,
geonameid = None,
name = None,
asciiname = None,
alternatenames = None,
latitude = None,
longitude = None,
feature_class = None,
feature_code = None,
country_code = None,
cc2 = None,
admin1_code = None,
admin2_code = None,
admin3_code = None,
admin4_code = None,
population = None,
elevation = None,
dem = None,
timezone = None,
modification_date = None):


cur = self.conn.cursor()
statement = """
INSERT INTO geonames
(geonameid, name, asciiname, alternatenames, latitude,
longitude, feature_class, feature_code, country_code,
cc2, admin1_code, admin2_code, admin3_code, admin4_code,
population, elevation, dem, timezone, modification_date)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
"""
cur.execute(statement, (geonameid, name, asciiname, alternatenames, latitude, longitude,
feature_class, feature_code, country_code,
cc2, admin1_code, admin2_code, admin3_code, admin4_code,
population, elevation, dem, timezone, modification_date))
self.conn.commit()
cur.close()
28 changes: 28 additions & 0 deletions parse_countries.py
@@ -0,0 +1,28 @@
from database.database import Database

db = Database("twitter-geo")

print ("Connection Established")

file = open("allCountries.txt", 'r')


progress = 0

for line in file:
entries = line.split('\t')
remove_newline_from_last = entries[-1].split('\n')[0]

i = 0
for entry in entries:
if entries[i] == '':
entries[i] = None
i+=1

db.loadCountries(entries[0], entries[1], entries[2], entries[3], entries[4],
entries[5], entries[6], entries[7], entries[8], entries[9],
entries[10], entries[11], entries[12], entries[13], entries[14],
entries[15], entries[16], entries[17], remove_newline_from_last)
progress += 1
if (progress%100000 == 0):
print ("Processed lines: ", progress)
1 change: 1 addition & 0 deletions placement_methods/place_with_contains.py
@@ -0,0 +1 @@

80 changes: 80 additions & 0 deletions web/database_relation_searcher.py
@@ -0,0 +1,80 @@
from database import Database

class Database(Database):


#retweeted_id --> tweeten är en retweet
#in_reply_to_user_id --> någon har blivit mentioned
#in_reply_to_status_id --> tweeten är ett svar på en annan tweet
#original_tweet_retweet_count --> finns bara på kommenterade retweets
#En retweet är en "ren" retweet om attributet "original_tweet_retweet_count" inte är null
#En retweet är en "kommenterad" retweet om attributet "original_tweet_retweet_count" är null

#Statements för att hämta ut statistik
#SELECT count(*) FROM tweets WHERE retweeted_id IS NOT NULL AND in_reply_to_status_id IS NOT NULL AND
#SELECT count(*) FROM tweets WHERE retweeted_id IS NOT NULL AND in_reply_to_status_id IS NOT NULL AND
#SELECT count(*) FROM tweets WHERE retweeted_id IS NOT NULL AND in_reply_to_status_id IS NULL AND in_
#SELECT count(*) FROM tweets WHERE retweeted_id IS NOT NULL AND in_reply_to_status_id IS NULL AND in_
#SELECT count(*) FROM tweets WHERE retweeted_id IS NULL AND in_reply_to_status_id IS NOT NULL AND in_
#SELECT count(*) FROM tweets WHERE retweeted_id IS NULL AND in_reply_to_status_id IS NOT NULL AND in_
#SELECT count(*) FROM tweets WHERE retweeted_id IS NULL AND in_reply_to_status_id IS NULL AND in_repl
#SELECT count(*) FROM tweets WHERE retweeted_id IS NULL AND in_reply_to_status_id IS NULL AND in_repl

def get_total_clean_retweet_count(self):
cur = self.conn.cursor()

statement = """
SELECT count(*) FROM tweets WHERE retweeted_id IS NOT NULL
AND in_reply_to_status_id IS NULL
AND in_reply_to_user_id IS NULL;
"""

cur.execute(statement)
retweet_count = cur.fetchone()

cur.close()
return retweet_count[0]

def get_total_commented_retweet_count(self):
cur = self.conn.cursor()

statement = """
SELECT count(*) FROM tweets
WHERE retweeted_id IS NOT NULL
AND in_reply_to_status_id IS NULL
AND in_reply_to_user_id IS NOT NULL
"""
cur.execute(statement)
commented_retweet_count = cur.fetchone()

cur.close()
return commented_retweet_count[0]

def get_total_reply_count(self):
cur = self.conn.cursor()

statement = """
SELECT count(*) FROM tweets
WHERE retweeted_id IS NOT NULL
AND in_reply_to_status_id IS NOT NULL
AND in_reply_to_user_id IS NOT NULL;
"""
cur.execute(statement)
commented_retweet_count = cur.fetchone()

cur.close()
return commented_retweet_count[0]

def get_total_mention_count(self):
cur = self.conn.cursor()

statement = """
select count(*) from tweets
where in_reply_to_user_id <> 25073877
AND text ILIKE '@realDonaldTrump%';
"""
cur.execute(statement)
commented_retweet_count = cur.fetchone()

cur.close()
return commented_retweet_count[0]

0 comments on commit 50de61a

Please sign in to comment.