-
Notifications
You must be signed in to change notification settings - Fork 67
Docker restarts infinitely: "Failed to fork child process" #10
Comments
After updating to latest docker image it is working again for me - if anybody has done something on purpose to fix this: Thanks :-) |
I'm sorry to post on an closed issue, but this issue seems to still be here or to have returned since OP commented that an update fixed his situation.
[
{
"Id": "38ea21ae35b6214d779a1b23208d8a75bc435ff8315dbb57e20d02498bc0333a",
"Created": "2017-06-04T07:52:49.45273884Z",
"Path": "/bin/sh",
"Args": [
"-c",
"bash start-libreoffice.sh"
],
"State": {
"Status": "exited",
"Running": false,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 0,
"ExitCode": 137,
"Error": "",
"StartedAt": "2017-06-04T07:57:17.788074925Z",
"FinishedAt": "2017-06-04T12:03:22.069898922Z"
},
"Image": "sha256:d3757c5469234839e2d58dfaee49cc8466d61f87074961391986e861c6dbf14f",
"ResolvConfPath": "/var/lib/docker/containers/38ea21ae35b6214d779a1b23208d8a75bc435ff8315dbb57e20d02498bc0333a/resolv.conf",
"HostnamePath": "/var/lib/docker/containers/38ea21ae35b6214d779a1b23208d8a75bc435ff8315dbb57e20d02498bc0333a/hostname",
"HostsPath": "/var/lib/docker/containers/38ea21ae35b6214d779a1b23208d8a75bc435ff8315dbb57e20d02498bc0333a/hosts",
"LogPath": "/var/lib/docker/containers/38ea21ae35b6214d779a1b23208d8a75bc435ff8315dbb57e20d02498bc0333a/38ea21ae35b6214d779a1b23208d8a75bc435ff8315dbb57e20d02498bc0333a-json.log",
"Name": "/wizardly_borg",
"RestartCount": 5,
"Driver": "overlay2",
"MountLabel": "",
"ProcessLabel": "",
"AppArmorProfile": "",
"ExecIDs": null,
"HostConfig": {
"Binds": null,
"ContainerIDFile": "",
"LogConfig": {
"Type": "json-file",
"Config": {}
},
"NetworkMode": "default",
"PortBindings": {
"9980/tcp": [
{
"HostIp": "127.0.0.1",
"HostPort": "9980"
}
]
},
"RestartPolicy": {
"Name": "always",
"MaximumRetryCount": 0
},
"AutoRemove": false,
"VolumeDriver": "",
"VolumesFrom": null,
"CapAdd": [
"MKNOD"
],
"CapDrop": null,
"Dns": [],
"DnsOptions": [],
"DnsSearch": [],
"ExtraHosts": null,
"GroupAdd": null,
"IpcMode": "",
"Cgroup": "",
"Links": null,
"OomScoreAdj": 0,
"PidMode": "",
"Privileged": false,
"PublishAllPorts": false,
"ReadonlyRootfs": false,
"SecurityOpt": null,
"UTSMode": "",
"UsernsMode": "",
"ShmSize": 67108864,
"Runtime": "runc",
"ConsoleSize": [
0,
0
],
"Isolation": "",
"CpuShares": 0,
"Memory": 0,
"NanoCpus": 0,
"CgroupParent": "",
"BlkioWeight": 0,
"BlkioWeightDevice": null,
"BlkioDeviceReadBps": null,
"BlkioDeviceWriteBps": null,
"BlkioDeviceReadIOps": null,
"BlkioDeviceWriteIOps": null,
"CpuPeriod": 0,
"CpuQuota": 0,
"CpuRealtimePeriod": 0,
"CpuRealtimeRuntime": 0,
"CpusetCpus": "",
"CpusetMems": "",
"Devices": [],
"DeviceCgroupRules": null,
"DiskQuota": 0,
"KernelMemory": 0,
"MemoryReservation": 0,
"MemorySwap": 0,
"MemorySwappiness": -1,
"OomKillDisable": false,
"PidsLimit": 0,
"Ulimits": null,
"CpuCount": 0,
"CpuPercent": 0,
"IOMaximumIOps": 0,
"IOMaximumBandwidth": 0
},
"GraphDriver": {
"Data": {
"LowerDir": "/var/lib/docker/overlay2/a6e3472d865a223330d01c5b078369f206439bea8c8f99e1dc600ca483ffaee0-init/diff:/var/lib/docker/overlay2/b14335218e66a1ea8940ef671bec2f7c6f7030a9f184765af6d9f60ba6c13e94/diff:/var/lib/docker/overlay2/ad5642adbe8566f711fcec95ae4cab5e62ce34cc1df63d108940985415cd149e/diff:/var/lib/docker/overlay2/a30640e1e0bc41324e49d69e0181245bdf94f15dea4520841cddf3fba9c0ccb6/diff:/var/lib/docker/overlay2/24f88ff1146028958a4b0c9467b02afaf052d239616e45a8e808f840e4781113/diff:/var/lib/docker/overlay2/411c16332e95176fb553363ff8248db4c8e5a3464f757ba63726b7f9f92db4e9/diff:/var/lib/docker/overlay2/685233aa866ef5afa2823e5c3d4512df097d59dd9d0bde8253f441735e37535a/diff:/var/lib/docker/overlay2/752fb968056ab7eca54814d6f09a0fe7a44fd37e7be8774cd42fca79733d3b4d/diff:/var/lib/docker/overlay2/d886180b45b8e08fe76f25db152f30e5f2a9af75aaae5f33634881ade9af904a/diff",
"MergedDir": "/var/lib/docker/overlay2/a6e3472d865a223330d01c5b078369f206439bea8c8f99e1dc600ca483ffaee0/merged",
"UpperDir": "/var/lib/docker/overlay2/a6e3472d865a223330d01c5b078369f206439bea8c8f99e1dc600ca483ffaee0/diff",
"WorkDir": "/var/lib/docker/overlay2/a6e3472d865a223330d01c5b078369f206439bea8c8f99e1dc600ca483ffaee0/work"
},
"Name": "overlay2"
},
"Mounts": [],
"Config": {
"Hostname": "38ea21ae35b6",
"Domainname": "",
"User": "",
"AttachStdin": false,
"AttachStdout": false,
"AttachStderr": false,
"ExposedPorts": {
"9980/tcp": {}
},
"Tty": true,
"OpenStdin": false,
"StdinOnce": false,
"Env": [
"domain=subdomain\\.domain\\.org",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"LC_CTYPE=en_US.UTF-8"
],
"Cmd": [
"/bin/sh",
"-c",
"bash start-libreoffice.sh"
],
"ArgsEscaped": true,
"Image": "collabora/code",
"Volumes": null,
"WorkingDir": "",
"Entrypoint": null,
"OnBuild": null,
"Labels": {}
},
"NetworkSettings": {
"Bridge": "",
"SandboxID": "ffeaab1f03863e71fcb167480dd9c100cb698557378f991972915b0a3fef004d",
"HairpinMode": false,
"LinkLocalIPv6Address": "",
"LinkLocalIPv6PrefixLen": 0,
"Ports": {},
"SandboxKey": "/var/run/docker/netns/ffeaab1f0386",
"SecondaryIPAddresses": null,
"SecondaryIPv6Addresses": null,
"EndpointID": "",
"Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"IPAddress": "",
"IPPrefixLen": 0,
"IPv6Gateway": "",
"MacAddress": "",
"Networks": {
"bridge": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "e55ad2c38cb147a8209e284ef5dcb5078a23641207cd25f490900f3890fbf328",
"EndpointID": "",
"Gateway": "",
"IPAddress": "",
"IPPrefixLen": 0,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": ""
}
}
}
}
]
I'm available to provide any extra info or perform test if needed. |
this still appears broken, docker image just keeps restarting for me too. |
same here |
I'm reopening this, as this still seems to affect many people. In fact, I've not dared to restart my own docker since it is currently running and there's so many reports of it not working for others that I don't want to take any risks. You might try to restart it over and over again and see if it is some kind of timing / race-condition where it might work 1 out of 20 times or something... Again, I've not change any of my setup and it suddenly worked after trying countless times... |
Status : Waiting: CrashLoopBackOff :D it's not 1/20 :D 0/210 for me
|
There's a 50 s time limit for the preinit stage during startup, it might be it fails to complete the process during that period on certain systems. |
Yup it's can be that :) |
Where's that timeout defined? Do we have any chance to change that to something a little higher? |
From the look of my log messages, the timeout is visible, but doesn't appear to be the cause of the issue. Allow me to elaborate:
This is one full cycle and part of a second cycle of the init phase in the log. |
Yes, looking at my own logs, I'd agree with you that the 50s timeout is between the two attempts and don't seem to be the cause of the issue, but rather one symptom. So it's still unclear why the forking of the child processes fails so often (or even always for some users). |
I got the same error, the solution is, to use not the /etc/default/docker for configuration the devicemapper - use systemd for this. The solution mentioned in this post -> https://help.nextcloud.com/t/collabora-docker-capabilities-problem/4774 did solve my problem. |
I have the same issue and using overlay2 on Arch Linux. Anyone solved this without switching to devicemapper? |
I have 17 running services on my server, is there any method to keep aufs method and not devicemapper? |
The timeout was increased in version 2.1.5, give it a try to see if that helps. |
I have just tested with version 2.1.5, the problem is still not resolved. It just takes longer time to crash but the docker image is not functional as the web app is not responding while waiting:
|
Still happening for me as well. |
Hi, I found the issue on all my debian setups with docker with versions from 2.1.2 to 3.0.0.0. For docker I tried overlay2 and overlay storage driver. The fact that I find the issue with debian package makes me think the error does not comes from docker but something higher like a system library or a kernel issue. Can someone else confirms package install and docker fails on the same machine ? |
the probleme is on CollaboraOnline code |
#10 (comment): this seems to be a different issue, the required capabilities are missing. #10 (comment), #10 (comment): I've had no issues with aufs and overlay2 storage drivers on Ubuntu 17.04 / 17.10. There can be different cases, and in others the Collabora Online log in the container should give further errors, eg. this and this are both unrelated issues, but lead to "FTL Failed to fork child process". |
First off I would like to say I am having the same or at least very similar issues with the Docker Container and it restarting constantly. I am running it on DigitalOcean on ubuntu 16.04 and it is the only installed program beyond the base system and docker requirements. Second, issues like this is the precise reason that open source projects either don't get wide adoption or out right fail! This issue has been open for almost a year and is still not fixed! I'd be more than happy to pay for Collabora Code IF they could get it working and stable but I really don't see that happening. IDK maybe I'm just old and angry but when you are shipping a product you don't just update it without testing and then push this update out and after people have stated there are major or critical issues you roll it back to the previous version not just leave it on the broken one. Collabora really needs to get there S together! |
Okay; a probable fix. Can somebody try passing this environment (SLEEPFORDEBUGGER=0) and see if it works. You can pass it like If it doesn't fix the issue, trace level logs (by changing the entry from warning to trace in /etc/loolwsd/loolwsd.xml; also consider changing the flush attribute in to true) might help. |
With this variable, the container has stopped restarting continuously but it get stuck during the initialisation process:
Of course, the server is not started and nothing is answering to the https requests. |
Here is a log in "trace" mode: https://framabin.org/?4fe8437720b162a9#O/H659YpD9U/BAMgKRxQSXHN6DIVl2Wfpth8wlOHVaU= |
I don't know why capabilities are not set on loolforkit binary in your docker container. As mentioned in the error message, it may depend on your kernel boot options, etc. |
And you didn't set the flush to true in logging, I think. Did you? Anyways, trace level logs are irrelevant here since it's clear what the problem is -- you need to figure out why capabilities are not set in the first place. |
Hi, |
I can confirm the flush logging was set to true. I am able to run the docker image in a CentOS 7.4 host but I cannot launch it in a Debian 9.3. The docker command lines are identical: I have used The kernels used and docker configurations are the default ones for each distribution. I do not understand why the capabilities are not set when run on a Debian host. |
I'm using debian 9.3 kernel 4.12, overlay2 and it does not work even with capabilities enabled. |
@jribal can you paste your trace logs as well? |
Hi, here it is. |
@jribal Atleast, this is not a "Failed to fork child process" problem for you anymore. Please open a separate ticket. Reading the logs, everything looks fine to me as well. And a request to wsd to open a document should work if you give it any; I cannot see you are making any request to wsd for opening the document, etc. Did you try opening the document? Are you sure the request really reached wsd (by checking if something is printed in the logs)? |
You are right. Yes I can reach the container but I can't access admin page or file opening. Either from internet or local I get several I will search or open a new issue, it seems like my issue is not the same as @asirinelli. EDIT : Is is indeed another issue. I did not pay close attention to the default configuration. it always requires https. I'm using traefik so no need for it. In /etc/loolwsd/loolwsd.xml I changed "SSL settings" enable to false and connection via proxy... to true. |
I found an interesting post here https://forum.synology.com/enu/viewtopic.php?t=132977 |
Installing seccomp fixed it for me, too. Ubuntu 16.04, 4.13.0-32-generic, Docker version 17.12.0-ce, build c97c6d6 |
@pranavk |
\o/ |
@returntrip which version is that? 3.0 has increased timeout already. |
@thebearon I am using the latest docker image which should be CODE 3.0.0.5 |
CODE 3.1.0 on docker 1.13.1 on CentOS 7.4, I've resolved the issue changing storage-driver from overlay2 to devicemapper |
Just to notify, this is still present. I have Collabora 3.2.2 (git hash: ffc419a) from docker, running on Debian testing (Buster). Kernel is 4.13.0-1-amd64.
I get I can provide additional info and logs if you need. |
I too am getting a similar issue.
Both the I too can provide any additional logs and information is needed. |
@pranavk thanks, your suggested workaround worked for me:
|
I had the same issue, but it went away when I changed the password env variable from my complex password (-e "password=DFlkj$shnk&") to a simple string of letters (-e "password=admin"). Changing back to the complex password makes the error return. Strange |
@danteali could you please share with us what happens in this scenario:
Thanks |
That was one of the things I tried while troubleshooting since I found the suggestion in another issue. It made no discernible difference. Same errors. |
Got the same problem here.
Here's the final part of the log:
The last two lines come up when I try to access the domain on the port nginx controls for the reverse proxy. |
This is still a problem w/ latest docker image 4.0.0.2 running on Ubuntu Server 18.04 and docker 18.09.1
|
I got this issue when I moved the All docker operations are slower. This is cheaper, but broke my collabora/code. Adding Maybe this is linked to a performance issue. Can I suggest to write in the docs where are the sensible directories which could be mounted on a faster disk ? |
So as far as I could trace the bug at least for some of you guys, you are running collabora on a NFS filesystem which does not support capabilities you can turn them of by passing --o:security.capabilities=false to loolwsd, that disables the capabilities check and you can run it |
Reproducible:
Always
Steps to reproduce:
Expected result:
Docker should start up fine
Actual result:
Docker will restart over and over (filling up HDD space)
Logs:
docker -l debug logs -f DOCKERID
System information
uname -a
free -m
This is more or less a cross-post from https://help.nextcloud.com/t/restarting-docker-results-in-failed-to-fork-child-processes/12209
The text was updated successfully, but these errors were encountered: