Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from Craigson/master
adding parsing scripts
- Loading branch information
Showing
3 changed files
with
341 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
{ | ||
"images": [ | ||
"1007068097548587008.jpg", | ||
"1013258241213648896.jpg", | ||
"1019905139470929920.jpg", | ||
"1019939664372682752.jpg", | ||
"1020122231785005056.jpg", | ||
"1020241290031517696.jpg", | ||
"1020976095584980992.jpg", | ||
"1025547332575543296.jpg", | ||
"1035635687136845824.jpg", | ||
"1039635691702996992.jpg", | ||
"1040111834935734272.jpg", | ||
"1040125918204628992.jpg", | ||
"1040127942753153024.jpg", | ||
"1040147069832474624.jpg", | ||
"1040405032585035776.jpg", | ||
"1040420304675352576.jpg", | ||
"1043730256063787008.jpg", | ||
"1046231588012253184.jpg", | ||
"1049042149112778752.jpg", | ||
"1050225431502376960.jpg", | ||
"1050229898444443648.jpg", | ||
"1050235287449866240.jpg", | ||
"1050514917402247168.jpg", | ||
"1055259055490560000.jpg", | ||
"1059878013195931648.jpg", | ||
"1068651596910682112.jpg", | ||
"1070774575283200000.jpg", | ||
"1073986431099228160.jpg", | ||
"1090430079219470336.jpg", | ||
"1091042672346603520.jpg", | ||
"1091042818220277760.jpg", | ||
"1091045961490644992.jpg", | ||
"1091046696353505280.jpg", | ||
"1091048782302720000.jpg", | ||
"1091048895271890944.jpg", | ||
"1091069991098691584.jpg", | ||
"1091083241534177280.jpg", | ||
"1091085478310326272.jpg", | ||
"1094085212620857344.jpg", | ||
"1095868645537660928.jpg", | ||
"1096504166748221440.jpg", | ||
"1096873038932324352.jpg", | ||
"1097664722628743168.jpg", | ||
"1101967710835081216.jpg", | ||
"1110660025753464832.jpg", | ||
"1114630591703588864.jpg", | ||
"1116111005714407424.jpg", | ||
"1116443860722216960.jpg", | ||
"1121196629995757568.jpg", | ||
"1124790864036937728.jpg", | ||
"1133261999812300800.jpg", | ||
"1133935783426813952.jpg", | ||
"1133955997380370432.jpg", | ||
"1133972325675753472.jpg", | ||
"1133979578382340096.jpg", | ||
"1135041130250240000.jpg", | ||
"1140076513845239808.jpg", | ||
"1143761385604395008.jpg", | ||
"1147314207537475584.jpg", | ||
"1147374540792406016.jpg", | ||
"1149448821663739904.jpg", | ||
"1165111183569838080.jpg", | ||
"1169505845705613312.jpg", | ||
"1176737401784123392.jpg", | ||
"1177784153018912768.jpg", | ||
"1178137033974386688.jpg", | ||
"1180325589010337792.jpg", | ||
"1187213020712390656.jpg", | ||
"1188238860266913792.jpg", | ||
"1190608665925812224.jpg", | ||
"1191810572225077248.jpg", | ||
"1231332402262433792.jpg", | ||
"1279238145321906176.jpg", | ||
"1295897900823805952.jpg", | ||
"1298885472571293696.jpg", | ||
"1298887117837107200.jpg", | ||
"1298891268759425024.jpg", | ||
"944395338108780544.jpg", | ||
"947215024257155072.jpg", | ||
"950588614461517824.jpg", | ||
"950888148634886144.jpg", | ||
"951219255045681152.jpg", | ||
"956641980211953664.jpg", | ||
"962014169299869696.jpg", | ||
"962014304108957696.jpg", | ||
"962014593339867136.jpg", | ||
"962014611627020288.jpg", | ||
"962014996408254464.jpg", | ||
"962015327036846080.jpg", | ||
"962017115567702016.jpg", | ||
"962752927913648128.jpg", | ||
"963775397034647552.jpg", | ||
"966397657096560640.jpg", | ||
"966428278342955008.jpg", | ||
"966428366574108672.jpg", | ||
"966428668287193088.jpg", | ||
"966429185021214720.jpg", | ||
"973688088234795008.jpg", | ||
"973689639003361280.jpg", | ||
"973692035951677440.jpg", | ||
"989973831039012864.jpg", | ||
"989974204089040896.jpg", | ||
"989974338809823232.jpg", | ||
"990021559018561536.jpg", | ||
"990041551185338368.jpg", | ||
"avatar.jpg" | ||
], | ||
"videos": [ | ||
"1007068097548587008.mp4", | ||
"1013258241213648896.mp4", | ||
"1019905139470929920.mp4", | ||
"1019939664372682752.mp4", | ||
"1020122231785005056.mp4", | ||
"1020241290031517696.mp4", | ||
"1020976095584980992.mp4", | ||
"1025547332575543296.mp4", | ||
"1035635687136845824.mp4", | ||
"1039635691702996992.mp4", | ||
"1040111834935734272.mp4", | ||
"1040125918204628992.mp4", | ||
"1040127942753153024.mp4", | ||
"1040147069832474624.mp4", | ||
"1040405032585035776.mp4", | ||
"1040420304675352576.mp4", | ||
"1043730256063787008.mp4", | ||
"1046231588012253184.mp4", | ||
"1049042149112778752.mp4", | ||
"1050225431502376960.mp4", | ||
"1050229898444443648.mp4", | ||
"1050235287449866240.mp4", | ||
"1050514917402247168.mp4", | ||
"1055259055490560000.mp4", | ||
"1059878013195931648.mp4", | ||
"1068651596910682112.mp4", | ||
"1070774575283200000.mp4", | ||
"1073986431099228160.mp4", | ||
"1090430079219470336.mp4", | ||
"1091042672346603520.mp4", | ||
"1091042818220277760.mp4", | ||
"1091045961490644992.mp4", | ||
"1091046696353505280.mp4", | ||
"1091048782302720000.mp4", | ||
"1091048895271890944.mp4", | ||
"1091069991098691584.mp4", | ||
"1091083241534177280.mp4", | ||
"1091085478310326272.mp4", | ||
"1094085212620857344.mp4", | ||
"1095868645537660928.mp4", | ||
"1096504166748221440.mp4", | ||
"1096873038932324352.mp4", | ||
"1097664722628743168.mp4", | ||
"1101967710835081216.mp4", | ||
"1110660025753464832.mp4", | ||
"1114630591703588864.mp4", | ||
"1116111005714407424.mp4", | ||
"1116443860722216960.mp4", | ||
"1121196629995757568.mp4", | ||
"1124790864036937728.mp4", | ||
"1133261999812300800.mp4", | ||
"1133935783426813952.mp4", | ||
"1133955997380370432.mp4", | ||
"1133972325675753472.mp4", | ||
"1133979578382340096.mp4", | ||
"1135041130250240000.mp4", | ||
"1140076513845239808.mp4", | ||
"1143761385604395008.mp4", | ||
"1147314207537475584.mp4", | ||
"1147374540792406016.mp4", | ||
"1149448821663739904.mp4", | ||
"1165111183569838080.mp4", | ||
"1169505845705613312.mp4", | ||
"1176737401784123392.mp4", | ||
"1177784153018912768.mp4", | ||
"1178137033974386688.mp4", | ||
"1180325589010337792.mp4", | ||
"1187213020712390656.mp4", | ||
"1188238860266913792.mp4", | ||
"1190608665925812224.mp4", | ||
"1191810572225077248.mp4", | ||
"1231332402262433792.mp4", | ||
"1279238145321906176.mp4", | ||
"1295897900823805952.mp4", | ||
"1298885472571293696.mp4", | ||
"1298887117837107200.mp4", | ||
"1298891268759425024.mp4", | ||
"944395338108780544.mp4", | ||
"947215024257155072.mp4", | ||
"950588614461517824.mp4", | ||
"950888148634886144.mp4", | ||
"951219255045681152.mp4", | ||
"956641980211953664.mp4", | ||
"962014169299869696.mp4", | ||
"962014304108957696.mp4", | ||
"962014593339867136.mp4", | ||
"962014611627020288.mp4", | ||
"962014996408254464.mp4", | ||
"962015327036846080.mp4", | ||
"962017115567702016.mp4", | ||
"962752927913648128.mp4", | ||
"963775397034647552.mp4", | ||
"966397657096560640.mp4", | ||
"966428278342955008.mp4", | ||
"966428366574108672.mp4", | ||
"966428668287193088.mp4", | ||
"966429185021214720.mp4", | ||
"973688088234795008.mp4", | ||
"973689639003361280.mp4", | ||
"973692035951677440.mp4", | ||
"989973831039012864.mp4", | ||
"989974204089040896.mp4", | ||
"989974338809823232.mp4", | ||
"990021559018561536.mp4", | ||
"990041551185338368.mp4" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
This script parses the tweets.csv file into a usable tweets.json file. | ||
CSV LAYOUT IS AS FOLLOWS: | ||
[0] tweet_id, | ||
[1] in_reply_to_status_id, | ||
[2] in_reply_to_user_id, | ||
[3] timestamp, | ||
[4] source, | ||
[5] text, | ||
[6] retweeted_status_id, | ||
[7] retweeted_status_user_id, | ||
[8] retweeted_status_timestamp, | ||
[9] expanded_urls | ||
*/ | ||
|
||
var fs = require('fs'); | ||
var parse = require('csv-parse'); | ||
var async = require('async'); | ||
|
||
var inputFile='tweets.csv'; | ||
|
||
// create an object to hold the tweets | ||
var resultsJson = { | ||
tweets: [] | ||
}; | ||
|
||
// parse the .csv file | ||
var parser = parse({delimiter: ','}, function (err, data) { | ||
|
||
// use async to run through each line sequentially | ||
async.eachSeries(data, function (line, callback) { | ||
|
||
var urls = []; | ||
|
||
// check if the field is empty | ||
if (line[9].length > 0 && line[9] != "expanded_urls"){ | ||
|
||
// check for multiple URLs | ||
if (line[9].indexOf(',') != -1) | ||
{ | ||
var temp_urls = line[9].split(','); | ||
|
||
for (var i = 0; i < temp_urls.length; i++) | ||
{ | ||
urls.push(temp_urls[i]); | ||
} | ||
|
||
} else { | ||
urls.push(line[9]); | ||
} | ||
|
||
|
||
} | ||
|
||
// create a tweet object with all relevant data | ||
var tweet = { | ||
tweet_id: line[0], | ||
in_reply_to_status_id: line[1], | ||
in_reply_to_user_id: line[2], | ||
timestamp: line[3], | ||
source: decodeURI(line[4]), | ||
text: line[5], | ||
retweeted_status_id: line[6], | ||
retweeted_status_user_id: line[7], | ||
retweeted_status_timestamp: line[8], | ||
expanded_urls: urls | ||
} | ||
|
||
// push the tweet object into the tweets array | ||
resultsJson.tweets.push(tweet); | ||
|
||
callback(); | ||
|
||
}, function(){ | ||
console.log('Saving Json!'); | ||
|
||
// convert the resultsJson file to a string | ||
var json = JSON.stringify(resultsJson, null, 2); | ||
|
||
// write the file to disk | ||
fs.writeFile('./tweets.json', json, function(err) { | ||
if (err) console.log('Err ' + err); | ||
console.log('Successfully created tweets.json'); | ||
}); | ||
|
||
}); | ||
}); | ||
|
||
|
||
fs.createReadStream(inputFile).pipe(parser); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/* | ||
This quick and dirty script checks the assets/Vine folder for images and videos and | ||
creates a standalone json file containing the filenames | ||
*/ | ||
|
||
var fs = require('fs'); | ||
|
||
var resultsJson = { | ||
// images: [], | ||
// videos: [] | ||
} | ||
|
||
const imageFolder = './VINE-WH-archive_1421922769494487040/images/'; | ||
const videosFolder = './VINE-WH-archive_1421922769494487040/videos/'; | ||
|
||
var images = fs.readdirSync(imageFolder); | ||
var videos = fs.readdirSync(videosFolder); | ||
|
||
resultsJson.images = images; | ||
resultsJson.videos = videos; | ||
|
||
// convert the resultsJson file to a string | ||
var json = JSON.stringify(resultsJson, null, 2); | ||
|
||
console.log("writing json file"); | ||
|
||
// write the file to disk | ||
fs.writeFile('./vine_data.json', json, function(err) { | ||
if (err) console.log('Err ' + err); | ||
console.log('Successfully created vine_data.json'); | ||
}); |