This repository has been archived by the owner on Sep 7, 2020. It is now read-only.
/
ig-data-extractor.py
53 lines (43 loc) · 1.57 KB
/
ig-data-extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import urllib.request
import json
import csv
import sys
import os.path
in_txt_name= sys.argv[1] if len(sys.argv) >= 2 else "hashtags_in.txt"
out_csv_name= sys.argv[2] if len(sys.argv) >= 3 else "hashtags_out.csv"
string_out = ""
hashtags_list = []
class Hashtag:
name = ""
posts_count = 0
def __init__(self, name):
self.name = name
def update_posts_count(self):
url = "https://www.instagram.com/explore/tags/"+self.name+"/?__a=1"
with urllib.request.urlopen(url) as url:
jsondata = json.loads(url.read())
self.posts_count = jsondata["graphql"]["hashtag"]["edge_hashtag_to_media"]["count"]
def get_csv_row_array(self):
return ([self.name, self.posts_count])
def write_csv(list):
print("Writing csv...")
with open(out_csv_name, 'w') as csv_file:
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["Hashtag", "Posts Count"])
for hashtag in hashtags_list:
csv_writer.writerow(hashtag.get_csv_row_array())
if not os.path.exists(in_txt_name):
print("Input file not found!")
quit()
print("Opening input file...")
with open(in_txt_name,"r") as txt_file:
for row in txt_file:
hashtag_str = str(row.rstrip())
hashtag = Hashtag(hashtag_str)
print("Working on #"+hashtag_str+"...", end=" ", flush=True)
hashtag.update_posts_count()
hashtags_list.append(hashtag)
print("OK")
print("Sorting list...")
hashtags_list_sorted = sorted(hashtags_list, key = lambda h: h.posts_count, reverse=True)
write_csv(hashtags_list_sorted)