Move scrapy to its own module and add patch to fix broken permission …

…code. Scrapy is usually installed via pip where copying all permissions makes sense. In Nix the files copied are owned by root and readonly. As a consequence scrapy can't edit the project templates so scrapy startproject fails.
NixOS · Feb 16, 2017 · 69363e9 · 69363e9
1 parent c10b0e7
commit 69363e9
Show file tree

Hide file tree

Showing 3 changed files with 67 additions and 28 deletions.
diff --git a/pkgs/development/python-modules/scrapy/default.nix b/pkgs/development/python-modules/scrapy/default.nix
@@ -0,0 +1,38 @@
+{ buildPythonPackage, fetchurl, glibcLocales, mock, pytest, botocore,
+  testfixtures, pillow, six, twisted, w3lib, lxml, queuelib, pyopenssl,
+  service-identity, parsel, pydispatcher, cssselect, lib }:
+buildPythonPackage rec {
+    name = "Scrapy-${version}";
+    version = "1.3.1";
+
+    buildInputs = [ glibcLocales mock pytest botocore testfixtures pillow ];
+    propagatedBuildInputs = [
+      six twisted w3lib lxml cssselect queuelib pyopenssl service-identity parsel pydispatcher
+    ];
+
+    # Scrapy is usually installed via pip where copying all
+    # permissions makes sense. In Nix the files copied are owned by
+    # root and readonly. As a consequence scrapy can't edit the
+    # project templates.
+    patches = [ ./permissions-fix.patch ];
+
+    LC_ALL="en_US.UTF-8";
+
+    checkPhase = ''
+      py.test --ignore=tests/test_linkextractors_deprecated.py --ignore=tests/test_proxy_connect.py
+      # The ignored tests require mitmproxy, which depends on protobuf, but it's disabled on Python3
+    '';
+
+    src = fetchurl {
+      url = "mirror://pypi/S/Scrapy/${name}.tar.gz";
+      sha256 = "0s5qkxwfq842maxjd2j82ldp4dyb70kla3z5rr56z0p7ig53cbvk";
+    };
+
+    meta = with lib; {
+      description = "A fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages";
+      homepage = "http://scrapy.org/";
+      license = licenses.bsd3;
+      maintainers = with maintainers; [ drewkett ];
+      platforms = platforms.linux;
+    };
+}
diff --git a/pkgs/development/python-modules/scrapy/permissions-fix.patch b/pkgs/development/python-modules/scrapy/permissions-fix.patch
@@ -0,0 +1,28 @@
+diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py
+index 5941066..89f8edb 100644
+--- a/scrapy/commands/startproject.py
++++ b/scrapy/commands/startproject.py
+@@ -4,7 +4,7 @@ import os
+ import string
+ from importlib import import_module
+ from os.path import join, exists, abspath
+-from shutil import ignore_patterns, move, copy2, copystat
++from shutil import ignore_patterns, move, copyfile, copystat
+
+ import scrapy
+ from scrapy.commands import ScrapyCommand
+@@ -76,8 +76,7 @@ class Command(ScrapyCommand):
+             if os.path.isdir(srcname):
+                 self._copytree(srcname, dstname)
+             else:
+-                copy2(srcname, dstname)
+-        copystat(src, dst)
++                copyfile(srcname, dstname)
+
+     def run(self, args, opts):
+         if len(args) not in (1, 2):
+@@ -118,4 +117,3 @@ class Command(ScrapyCommand):
+         _templates_base_dir = self.settings['TEMPLATES_DIR'] or \
+             join(scrapy.__path__[0], 'templates')
+         return join(_templates_base_dir, 'project')
+-    
diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix
@@ -31002,35 +31002,8 @@ EOF
     };
   };
 
-  scrapy = buildPythonPackage rec {
-    name = "Scrapy-${version}";
-    version = "1.3.1";
-
-    buildInputs = with self; [ pkgs.glibcLocales mock pytest botocore testfixtures pillow ];
-    propagatedBuildInputs = with self; [
-      six twisted w3lib lxml cssselect queuelib pyopenssl service-identity parsel pydispatcher
-    ];
-
-    LC_ALL="en_US.UTF-8";
-
-    checkPhase = ''
-      py.test --ignore=tests/test_linkextractors_deprecated.py --ignore=tests/test_proxy_connect.py
-      # The ignored tests require mitmproxy, which depends on protobuf, but it's disabled on Python3
-    '';
+  scrapy = callPackage ../development/python-modules/scrapy { };
 
-    src = pkgs.fetchurl {
-      url = "mirror://pypi/S/Scrapy/${name}.tar.gz";
-      sha256 = "0s5qkxwfq842maxjd2j82ldp4dyb70kla3z5rr56z0p7ig53cbvk";
-    };
-
-    meta = {
-      description = "A fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages";
-      homepage = "http://scrapy.org/";
-      license = licenses.bsd3;
-      maintainers = with maintainers; [ drewkett ];
-      platforms = platforms.linux;
-    };
-  };
   pandocfilters = buildPythonPackage rec{
     version = "1.4.1";
     pname = "pandocfilters";