-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path1_makejson.py
65 lines (43 loc) · 1.16 KB
/
1_makejson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 16 20:00:02 2013
@author: EeshanMalhotra
"""
#import json
datapath="C:/etc/Projects/Data/MovieTaglines/data/taglines2.list"
ctr=0
taglines={}
ins = open(datapath, "r" )
for line in ins:
ctr+=1
if ctr%1000==0:
print ctr
#if ctr>=50:
#break
if len(line)<=1:
continue
if line[0]=="#":
#It's a title
movie=line[2:].encode("utf-8")
if movie[-5:-1]=='(TV)':
movie=movie[:-6]
if movie[-4:-1]=='(V)' or movie[-5:-1]=='(VG)' or movie[-2:-1]=='}':
movie="(V/VG/SUS)"
else:
if movie[-1:]=='\n':
movie=movie[:-1]
# not removing the year.
movie=movie.replace("\t"," ")
else:
#it's a tagline
tline=line[1:-1].encode("utf-8")
if movie in taglines:
taglines[movie]=taglines[movie]+" "+tline
else:
taglines[movie]=tline
print("Generated")
#print taglines
outfile=open('C:/etc/Projects/Data/MovieTaglines/data/taglines.tsv', 'w')
for i in taglines:
outfile.write(i+"\t"+taglines[i]+'\n')
print ("End")