Skip to content

Commit

Permalink
Merge pull request #14 from Craigson/master
Browse files Browse the repository at this point in the history
adding parsing scripts
  • Loading branch information
shiffman committed Jan 6, 2017
2 parents 656bd64 + 3a37b75 commit 25b8486
Show file tree
Hide file tree
Showing 3 changed files with 341 additions and 0 deletions.
217 changes: 217 additions & 0 deletions data/vine_data.json
@@ -0,0 +1,217 @@
{
"images": [
"1007068097548587008.jpg",
"1013258241213648896.jpg",
"1019905139470929920.jpg",
"1019939664372682752.jpg",
"1020122231785005056.jpg",
"1020241290031517696.jpg",
"1020976095584980992.jpg",
"1025547332575543296.jpg",
"1035635687136845824.jpg",
"1039635691702996992.jpg",
"1040111834935734272.jpg",
"1040125918204628992.jpg",
"1040127942753153024.jpg",
"1040147069832474624.jpg",
"1040405032585035776.jpg",
"1040420304675352576.jpg",
"1043730256063787008.jpg",
"1046231588012253184.jpg",
"1049042149112778752.jpg",
"1050225431502376960.jpg",
"1050229898444443648.jpg",
"1050235287449866240.jpg",
"1050514917402247168.jpg",
"1055259055490560000.jpg",
"1059878013195931648.jpg",
"1068651596910682112.jpg",
"1070774575283200000.jpg",
"1073986431099228160.jpg",
"1090430079219470336.jpg",
"1091042672346603520.jpg",
"1091042818220277760.jpg",
"1091045961490644992.jpg",
"1091046696353505280.jpg",
"1091048782302720000.jpg",
"1091048895271890944.jpg",
"1091069991098691584.jpg",
"1091083241534177280.jpg",
"1091085478310326272.jpg",
"1094085212620857344.jpg",
"1095868645537660928.jpg",
"1096504166748221440.jpg",
"1096873038932324352.jpg",
"1097664722628743168.jpg",
"1101967710835081216.jpg",
"1110660025753464832.jpg",
"1114630591703588864.jpg",
"1116111005714407424.jpg",
"1116443860722216960.jpg",
"1121196629995757568.jpg",
"1124790864036937728.jpg",
"1133261999812300800.jpg",
"1133935783426813952.jpg",
"1133955997380370432.jpg",
"1133972325675753472.jpg",
"1133979578382340096.jpg",
"1135041130250240000.jpg",
"1140076513845239808.jpg",
"1143761385604395008.jpg",
"1147314207537475584.jpg",
"1147374540792406016.jpg",
"1149448821663739904.jpg",
"1165111183569838080.jpg",
"1169505845705613312.jpg",
"1176737401784123392.jpg",
"1177784153018912768.jpg",
"1178137033974386688.jpg",
"1180325589010337792.jpg",
"1187213020712390656.jpg",
"1188238860266913792.jpg",
"1190608665925812224.jpg",
"1191810572225077248.jpg",
"1231332402262433792.jpg",
"1279238145321906176.jpg",
"1295897900823805952.jpg",
"1298885472571293696.jpg",
"1298887117837107200.jpg",
"1298891268759425024.jpg",
"944395338108780544.jpg",
"947215024257155072.jpg",
"950588614461517824.jpg",
"950888148634886144.jpg",
"951219255045681152.jpg",
"956641980211953664.jpg",
"962014169299869696.jpg",
"962014304108957696.jpg",
"962014593339867136.jpg",
"962014611627020288.jpg",
"962014996408254464.jpg",
"962015327036846080.jpg",
"962017115567702016.jpg",
"962752927913648128.jpg",
"963775397034647552.jpg",
"966397657096560640.jpg",
"966428278342955008.jpg",
"966428366574108672.jpg",
"966428668287193088.jpg",
"966429185021214720.jpg",
"973688088234795008.jpg",
"973689639003361280.jpg",
"973692035951677440.jpg",
"989973831039012864.jpg",
"989974204089040896.jpg",
"989974338809823232.jpg",
"990021559018561536.jpg",
"990041551185338368.jpg",
"avatar.jpg"
],
"videos": [
"1007068097548587008.mp4",
"1013258241213648896.mp4",
"1019905139470929920.mp4",
"1019939664372682752.mp4",
"1020122231785005056.mp4",
"1020241290031517696.mp4",
"1020976095584980992.mp4",
"1025547332575543296.mp4",
"1035635687136845824.mp4",
"1039635691702996992.mp4",
"1040111834935734272.mp4",
"1040125918204628992.mp4",
"1040127942753153024.mp4",
"1040147069832474624.mp4",
"1040405032585035776.mp4",
"1040420304675352576.mp4",
"1043730256063787008.mp4",
"1046231588012253184.mp4",
"1049042149112778752.mp4",
"1050225431502376960.mp4",
"1050229898444443648.mp4",
"1050235287449866240.mp4",
"1050514917402247168.mp4",
"1055259055490560000.mp4",
"1059878013195931648.mp4",
"1068651596910682112.mp4",
"1070774575283200000.mp4",
"1073986431099228160.mp4",
"1090430079219470336.mp4",
"1091042672346603520.mp4",
"1091042818220277760.mp4",
"1091045961490644992.mp4",
"1091046696353505280.mp4",
"1091048782302720000.mp4",
"1091048895271890944.mp4",
"1091069991098691584.mp4",
"1091083241534177280.mp4",
"1091085478310326272.mp4",
"1094085212620857344.mp4",
"1095868645537660928.mp4",
"1096504166748221440.mp4",
"1096873038932324352.mp4",
"1097664722628743168.mp4",
"1101967710835081216.mp4",
"1110660025753464832.mp4",
"1114630591703588864.mp4",
"1116111005714407424.mp4",
"1116443860722216960.mp4",
"1121196629995757568.mp4",
"1124790864036937728.mp4",
"1133261999812300800.mp4",
"1133935783426813952.mp4",
"1133955997380370432.mp4",
"1133972325675753472.mp4",
"1133979578382340096.mp4",
"1135041130250240000.mp4",
"1140076513845239808.mp4",
"1143761385604395008.mp4",
"1147314207537475584.mp4",
"1147374540792406016.mp4",
"1149448821663739904.mp4",
"1165111183569838080.mp4",
"1169505845705613312.mp4",
"1176737401784123392.mp4",
"1177784153018912768.mp4",
"1178137033974386688.mp4",
"1180325589010337792.mp4",
"1187213020712390656.mp4",
"1188238860266913792.mp4",
"1190608665925812224.mp4",
"1191810572225077248.mp4",
"1231332402262433792.mp4",
"1279238145321906176.mp4",
"1295897900823805952.mp4",
"1298885472571293696.mp4",
"1298887117837107200.mp4",
"1298891268759425024.mp4",
"944395338108780544.mp4",
"947215024257155072.mp4",
"950588614461517824.mp4",
"950888148634886144.mp4",
"951219255045681152.mp4",
"956641980211953664.mp4",
"962014169299869696.mp4",
"962014304108957696.mp4",
"962014593339867136.mp4",
"962014611627020288.mp4",
"962014996408254464.mp4",
"962015327036846080.mp4",
"962017115567702016.mp4",
"962752927913648128.mp4",
"963775397034647552.mp4",
"966397657096560640.mp4",
"966428278342955008.mp4",
"966428366574108672.mp4",
"966428668287193088.mp4",
"966429185021214720.mp4",
"973688088234795008.mp4",
"973689639003361280.mp4",
"973692035951677440.mp4",
"989973831039012864.mp4",
"989974204089040896.mp4",
"989974338809823232.mp4",
"990021559018561536.mp4",
"990041551185338368.mp4"
]
}
93 changes: 93 additions & 0 deletions scrapers_node/csv_parser.js
@@ -0,0 +1,93 @@
/*
This script parses the tweets.csv file into a usable tweets.json file.
CSV LAYOUT IS AS FOLLOWS:
[0] tweet_id,
[1] in_reply_to_status_id,
[2] in_reply_to_user_id,
[3] timestamp,
[4] source,
[5] text,
[6] retweeted_status_id,
[7] retweeted_status_user_id,
[8] retweeted_status_timestamp,
[9] expanded_urls
*/

var fs = require('fs');
var parse = require('csv-parse');
var async = require('async');

var inputFile='tweets.csv';

// create an object to hold the tweets
var resultsJson = {
tweets: []
};

// parse the .csv file
var parser = parse({delimiter: ','}, function (err, data) {

// use async to run through each line sequentially
async.eachSeries(data, function (line, callback) {

var urls = [];

// check if the field is empty
if (line[9].length > 0 && line[9] != "expanded_urls"){

// check for multiple URLs
if (line[9].indexOf(',') != -1)
{
var temp_urls = line[9].split(',');

for (var i = 0; i < temp_urls.length; i++)
{
urls.push(temp_urls[i]);
}

} else {
urls.push(line[9]);
}


}

// create a tweet object with all relevant data
var tweet = {
tweet_id: line[0],
in_reply_to_status_id: line[1],
in_reply_to_user_id: line[2],
timestamp: line[3],
source: decodeURI(line[4]),
text: line[5],
retweeted_status_id: line[6],
retweeted_status_user_id: line[7],
retweeted_status_timestamp: line[8],
expanded_urls: urls
}

// push the tweet object into the tweets array
resultsJson.tweets.push(tweet);

callback();

}, function(){
console.log('Saving Json!');

// convert the resultsJson file to a string
var json = JSON.stringify(resultsJson, null, 2);

// write the file to disk
fs.writeFile('./tweets.json', json, function(err) {
if (err) console.log('Err ' + err);
console.log('Successfully created tweets.json');
});

});
});


fs.createReadStream(inputFile).pipe(parser);
31 changes: 31 additions & 0 deletions scrapers_node/vine_parser.js
@@ -0,0 +1,31 @@
/*
This quick and dirty script checks the assets/Vine folder for images and videos and
creates a standalone json file containing the filenames
*/

var fs = require('fs');

var resultsJson = {
// images: [],
// videos: []
}

const imageFolder = './VINE-WH-archive_1421922769494487040/images/';
const videosFolder = './VINE-WH-archive_1421922769494487040/videos/';

var images = fs.readdirSync(imageFolder);
var videos = fs.readdirSync(videosFolder);

resultsJson.images = images;
resultsJson.videos = videos;

// convert the resultsJson file to a string
var json = JSON.stringify(resultsJson, null, 2);

console.log("writing json file");

// write the file to disk
fs.writeFile('./vine_data.json', json, function(err) {
if (err) console.log('Err ' + err);
console.log('Successfully created vine_data.json');
});

0 comments on commit 25b8486

Please sign in to comment.