Skip to content

Commit

Permalink
Move scrapy to its own module and add patch to fix broken permission …
Browse files Browse the repository at this point in the history
…code.

Scrapy is usually installed via pip where copying all permissions
makes sense. In Nix the files copied are owned by root and
readonly. As a consequence scrapy can't edit the project templates so

  scrapy startproject

fails.
  • Loading branch information
teh committed Feb 16, 2017
1 parent c10b0e7 commit 69363e9
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 28 deletions.
38 changes: 38 additions & 0 deletions pkgs/development/python-modules/scrapy/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{ buildPythonPackage, fetchurl, glibcLocales, mock, pytest, botocore,
testfixtures, pillow, six, twisted, w3lib, lxml, queuelib, pyopenssl,
service-identity, parsel, pydispatcher, cssselect, lib }:
buildPythonPackage rec {
name = "Scrapy-${version}";
version = "1.3.1";

buildInputs = [ glibcLocales mock pytest botocore testfixtures pillow ];
propagatedBuildInputs = [
six twisted w3lib lxml cssselect queuelib pyopenssl service-identity parsel pydispatcher
];

# Scrapy is usually installed via pip where copying all
# permissions makes sense. In Nix the files copied are owned by
# root and readonly. As a consequence scrapy can't edit the
# project templates.
patches = [ ./permissions-fix.patch ];

LC_ALL="en_US.UTF-8";

checkPhase = ''
py.test --ignore=tests/test_linkextractors_deprecated.py --ignore=tests/test_proxy_connect.py
# The ignored tests require mitmproxy, which depends on protobuf, but it's disabled on Python3
'';

src = fetchurl {
url = "mirror://pypi/S/Scrapy/${name}.tar.gz";
sha256 = "0s5qkxwfq842maxjd2j82ldp4dyb70kla3z5rr56z0p7ig53cbvk";
};

meta = with lib; {
description = "A fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages";
homepage = "http://scrapy.org/";
license = licenses.bsd3;
maintainers = with maintainers; [ drewkett ];
platforms = platforms.linux;
};
}
28 changes: 28 additions & 0 deletions pkgs/development/python-modules/scrapy/permissions-fix.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py
index 5941066..89f8edb 100644
--- a/scrapy/commands/startproject.py
+++ b/scrapy/commands/startproject.py
@@ -4,7 +4,7 @@ import os
import string
from importlib import import_module
from os.path import join, exists, abspath
-from shutil import ignore_patterns, move, copy2, copystat
+from shutil import ignore_patterns, move, copyfile, copystat

import scrapy
from scrapy.commands import ScrapyCommand
@@ -76,8 +76,7 @@ class Command(ScrapyCommand):
if os.path.isdir(srcname):
self._copytree(srcname, dstname)
else:
- copy2(srcname, dstname)
- copystat(src, dst)
+ copyfile(srcname, dstname)

def run(self, args, opts):
if len(args) not in (1, 2):
@@ -118,4 +117,3 @@ class Command(ScrapyCommand):
_templates_base_dir = self.settings['TEMPLATES_DIR'] or \
join(scrapy.__path__[0], 'templates')
return join(_templates_base_dir, 'project')
-
29 changes: 1 addition & 28 deletions pkgs/top-level/python-packages.nix
Original file line number Diff line number Diff line change
Expand Up @@ -31002,35 +31002,8 @@ EOF
};
};

scrapy = buildPythonPackage rec {
name = "Scrapy-${version}";
version = "1.3.1";

buildInputs = with self; [ pkgs.glibcLocales mock pytest botocore testfixtures pillow ];
propagatedBuildInputs = with self; [
six twisted w3lib lxml cssselect queuelib pyopenssl service-identity parsel pydispatcher
];

LC_ALL="en_US.UTF-8";

checkPhase = ''
py.test --ignore=tests/test_linkextractors_deprecated.py --ignore=tests/test_proxy_connect.py
# The ignored tests require mitmproxy, which depends on protobuf, but it's disabled on Python3
'';
scrapy = callPackage ../development/python-modules/scrapy { };

src = pkgs.fetchurl {
url = "mirror://pypi/S/Scrapy/${name}.tar.gz";
sha256 = "0s5qkxwfq842maxjd2j82ldp4dyb70kla3z5rr56z0p7ig53cbvk";
};

meta = {
description = "A fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages";
homepage = "http://scrapy.org/";
license = licenses.bsd3;
maintainers = with maintainers; [ drewkett ];
platforms = platforms.linux;
};
};
pandocfilters = buildPythonPackage rec{
version = "1.4.1";
pname = "pandocfilters";
Expand Down

0 comments on commit 69363e9

Please sign in to comment.