Skip to content

Commit

Permalink
Merge pull request #2 from aggrolite/master
Browse files Browse the repository at this point in the history
More cleanup
  • Loading branch information
Matthew Dees committed Feb 19, 2012
2 parents af55f64 + 3e80ce6 commit 2c1dea7
Showing 1 changed file with 17 additions and 57 deletions.
74 changes: 17 additions & 57 deletions reddit_img_snarfer.pl
Expand Up @@ -2,26 +2,26 @@
# Reddit Image Snarfer # Reddit Image Snarfer
# Copyright 2011 (c) Matt Dees # Copyright 2011 (c) Matt Dees
# Distributed under the 2-clause BSD. # Distributed under the 2-clause BSD.
use strict;
use warnings;



#use Data::Dumper;
use Data::Dumper;
use HTTP::Tiny (); use HTTP::Tiny ();
use JSON::XS (); use JSON::XS ();
use Image::Info ();
use File::Path qw(make_path); use File::Path qw(make_path);


my $http = HTTP::Tiny->new;

my @subreddits = qw/ EarthPorn VillagePorn /; my @subreddits = qw/ EarthPorn VillagePorn /;
my $save_dir = "$ENV{HOME}/Pictures/RedditTest"; my $root_dir = "$ENV{HOME}/Pictures/Snarfer";
my $number_of_pages = 10; my $number_of_pages = 10;


make_path($save_dir); #like mkdir -p

map { load_subreddit($_) } @subreddits; map { load_subreddit($_) } @subreddits;


sub load_subreddit { sub load_subreddit {
my ($subreddit) = @_; my $subreddit = shift;
print "\nProcessing /r/$subreddit\n---\n"; print "\nProcessing /r/$subreddit\n---\n";
my $res = HTTP::Tiny->new->get("http://www.reddit.com/r/$subreddit/top.json?sort=top&t=all"); my $res = $http->get("http://www.reddit.com/r/$subreddit/top.json?sort=top&t=all");
if ( $res->{'status'} != 200 ) { if ( $res->{'status'} != 200 ) {
return 'non-200 response recieved'; return 'non-200 response recieved';
} }
Expand Down Expand Up @@ -50,60 +50,20 @@ sub load_subreddit {


foreach my $link (@links) { foreach my $link (@links) {
my $url = $link->{'data'}->{'url'}; my $url = $link->{'data'}->{'url'};
my $name = $link->{'data'}->{'title'}; download_image( $url, $subreddit )
download_image( $url, $name ) unless ( $url !~ m@imgur\.com\/[a-z]+\.(png|jpg|gif)$@i );
unless ( $url !~ m@imgur\.com@i && $url !~ m@(png|jpg|jpeg)$@i );
} }
} }


sub download_image { sub download_image {
my ( $img_url, $name ) = @_; my ( $url, $sub ) = @_;
print "Downloading $img_url...\n"; print "Downloading $url...";
my $img_ref = HTTP::Tiny->new->get($img_url); my ($name) = ($url =~ m@imgur\.com/([a-z]+\.[a-z]{3})@i);

my $path = "$root_dir/$sub";
# try grabbing url .png and .jpg incase the first download returns a page make_path $path;
if ( $img_url =~ /imgur.com/ && $img_ref->{'headers'}->{'content-type'} =~ /text\/html/ ) { my $dl = $http->mirror( $url, "$path/$name" );
( $img_ref, $img_url ) = try_extensions_on_imgur($img_url); print $dl->{success} ? "OK\n" : "FAILED\n";
if ( !$img_ref ) {
print "Failure: image could not be downloaded.\n";
return;
}
}
if ( $img_ref->{'status'} != 200 ) {
print "Failure: image returned http status code " . $img_ref->{'status'} . "\n";
return;
}
process_img( $img_ref->{'content'}, $img_url, $name );


# print Dumper $img_ref;
}

sub process_img {
my ( $img_file_contents, $img_url ) = @_;
my $name = $img_url;
$name =~ s/^(.+\/){1,}(.+)$/$2/;
# my ($extension) = $img_url =~ /\.([a-zA-Z]{3,4})$/;
my $image_filename = "$save_dir/$name";
$image_filename =~ s/\.([a-zA-Z]{3,4})//;
my $img_type = Image::Info::image_type(\$img_file_contents)->{'file_type'};
print "Determined file type to be $img_type.\n";
if ( $img_type eq 'JPEG' ) {
$image_filename .= '.jpg';
}
elsif ( $img_type eq 'PNG' ) {
$image_filename .= '.png';
}
elsif ( $img_type eq 'GIF') {
$image_filename .= '.gif';
}
else {
print "File is not a valid image skipping.\n";
return;
}
print "Saving to $image_filename\n";
open( my $img_file_fh, '>', $image_filename ) || print "FAILED Opening File for Writing: $!\n";
print $img_file_fh $img_file_contents;
close $img_file_fh || die $!;
} }


sub try_extensions_on_imgur { sub try_extensions_on_imgur {
Expand Down

0 comments on commit 2c1dea7

Please sign in to comment.