Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Resurrect dead cells

  • Loading branch information...
commit 596b514f253ea77a36f0fa2a6010c3a02f2c77d1 1 parent 2368919
@cdrnet cdrnet authored
View
17 .../Lokad.Cloud.AppHost.Framework/Commands/EnsureAllCellsAreRunningUnlessCancelledCommand.cs
@@ -0,0 +1,17 @@
+#region Copyright (c) Lokad 2011-2012
+// This code is released under the terms of the new BSD licence.
+// URL: http://www.lokad.com/
+#endregion
+
+using System;
+
+namespace Lokad.Cloud.AppHost.Framework.Commands
+{
+ /// <summary>
+ /// Watchdog command.
+ /// </summary>
+ [Serializable]
+ public sealed class EnsureAllCellsAreRunningUnlessCancelledCommand : IHostCommand
+ {
+ }
+}
View
43 Source/Lokad.Cloud.AppHost.Framework/Instrumentation/Events/CellDeadRestartedEvent.cs
@@ -0,0 +1,43 @@
+#region Copyright (c) Lokad 2011-2012
+// This code is released under the terms of the new BSD licence.
+// URL: http://www.lokad.com/
+#endregion
+
+using System;
+using System.Xml.Linq;
+
+namespace Lokad.Cloud.AppHost.Framework.Instrumentation.Events
+{
+ [Serializable]
+ public class CellDeadRestartedEvent : IHostEvent
+ {
+ public HostEventLevel Level { get { return HostEventLevel.FatalError; } }
+ public HostLifeIdentity Host { get; private set; }
+ public string CellName { get; private set; }
+ public string SolutionName { get; private set; }
+
+ public CellDeadRestartedEvent(HostLifeIdentity host, string cellName, string solutionName)
+ {
+ Host = host;
+ CellName = cellName;
+ SolutionName = solutionName;
+ }
+
+ public string Describe()
+ {
+ return string.Format("AppHost: {0} cell of {1} solution was found dead on {2} and will be resurrected.",
+ CellName, SolutionName, Host.WorkerName);
+ }
+
+ public XElement DescribeMeta()
+ {
+ return new XElement("Meta",
+ new XElement("Component", "Lokad.Cloud.AppHost"),
+ new XElement("Event", "CellDeadRestartedEvent"),
+ new XElement("AppHost",
+ new XElement("Host", Host.WorkerName),
+ new XElement("Solution", SolutionName),
+ new XElement("Cell", CellName)));
+ }
+ }
+}
View
2  Source/Lokad.Cloud.AppHost.Framework/Lokad.Cloud.AppHost.Framework.csproj
@@ -43,11 +43,13 @@
<ItemGroup>
<Compile Include="AssemblyData.cs" />
<Compile Include="CellLifeIdentity.cs" />
+ <Compile Include="Commands\EnsureAllCellsAreRunningUnlessCancelledCommand.cs" />
<Compile Include="Definition\AssembliesHead.cs" />
<Compile Include="Definition\CellDefinition.cs" />
<Compile Include="Definition\SolutionDefinition.cs" />
<Compile Include="Definition\SolutionHead.cs" />
<Compile Include="Instrumentation\Events\CellAbortedEvent.cs" />
+ <Compile Include="Instrumentation\Events\CellDeadRestartedEvent.cs" />
<Compile Include="Instrumentation\Events\NewUnrelatedSolutionDetectedEvent.cs" />
<Compile Include="Instrumentation\Events\NewDeploymentOfSolutionDetectedEvent.cs" />
<Compile Include="HostLifeIdentity.cs" />
View
33 Source/Lokad.Cloud.AppHost/Cell.cs
@@ -7,7 +7,6 @@
using System.Reflection;
using System.Runtime.Remoting;
using System.Threading;
-using System.Threading.Tasks;
using Lokad.Cloud.AppHost.Framework;
using Lokad.Cloud.AppHost.Framework.Definition;
using Lokad.Cloud.AppHost.Framework.Instrumentation.Events;
@@ -28,6 +27,7 @@ internal sealed class Cell
private readonly IHostContext _hostContext;
private readonly Action<IHostCommand> _sendCommand;
+ private volatile Thread _thread;
private volatile CellAppDomainEntryPoint _entryPoint;
private volatile CellDefinition _cellDefinition;
private volatile SolutionHead _deployment;
@@ -56,8 +56,6 @@ private Cell(IHostContext hostContext, Action<IHostCommand> sendCommand, CellDef
return process;
}
- public Task Task { get; private set; }
-
/// <summary>
/// Shutdown just this cell. Use the Task property to wait for the shutdown to complete if needed.
/// </summary>
@@ -66,13 +64,28 @@ public void Cancel()
_cancellationTokenSource.Cancel();
}
+ /// <summary>
+ /// Ensure this cell is either still alive, or cancelled.
+ /// </summary>
+ public void EnsureIsRunningUnlessCancelled()
+ {
+ if (_cancellationTokenSource.Token.IsCancellationRequested)
+ {
+ return;
+ }
+
+ var thread = _thread;
+ if (thread == null || !thread.IsAlive)
+ {
+ _hostContext.Observer.TryNotify(() => new CellDeadRestartedEvent(_hostContext.Identity, _cellName, _solutionName));
+ Run();
+ }
+ }
+
void Run()
{
var cancellationToken = _cancellationTokenSource.Token;
- var completionSource = new TaskCompletionSource<object>();
- Task = completionSource.Task;
-
- var thread = new Thread(() =>
+ _thread = new Thread(() =>
{
var currentRoundStartTime = DateTimeOffset.UtcNow - FloodFrequencyThreshold;
while (!cancellationToken.IsCancellationRequested)
@@ -153,12 +166,10 @@ void Run()
AppDomain.Unload(domain);
}
}
-
- completionSource.TrySetCanceled();
});
- thread.Name = "Lokad.Cloud AppHost Cell (" +_cellName + ")";
- thread.Start();
+ _thread.Name = "Lokad.Cloud AppHost Cell (" + _cellName + ")";
+ _thread.Start();
}
public void OnCellDefinitionChanged(CellDefinition newCellDefinition, SolutionHead newDeployment)
View
24 Source/Lokad.Cloud.AppHost/Host.cs
@@ -25,6 +25,8 @@ public sealed class Host
private readonly int _autoLoadHeadDeploymentIntervalMs;
private readonly Timer _autoLoadHeadDeploymentTimer;
+ private const int _watchdogIntervalMs = 15000;
+ private readonly Timer _watchdogTimer;
private string _currentDeploymentEtag;
private SolutionHead _currentDeployment;
@@ -39,6 +41,7 @@ public Host(IHostContext context, int autoLoadHeadDeploymentIntervalMs = 30000)
_autoLoadHeadDeploymentIntervalMs = autoLoadHeadDeploymentIntervalMs;
_autoLoadHeadDeploymentTimer = new Timer(o => _commandQueue.Add(new LoadCurrentHeadDeploymentCommand()), null, Timeout.Infinite, _autoLoadHeadDeploymentIntervalMs);
+ _watchdogTimer = new Timer(o => _commandQueue.Add(new EnsureAllCellsAreRunningUnlessCancelledCommand()), null, Timeout.Infinite, _watchdogIntervalMs);
}
public void RunSync(CancellationToken cancellationToken)
@@ -52,6 +55,7 @@ public void RunSync(CancellationToken cancellationToken)
_currentDeploymentEtag = null;
_autoLoadHeadDeploymentTimer.Change(0, _autoLoadHeadDeploymentIntervalMs);
+ _watchdogTimer.Change(_watchdogIntervalMs, _watchdogIntervalMs);
foreach (var command in _commandQueue.GetConsumingEnumerable(cancellationToken))
{
@@ -61,6 +65,7 @@ public void RunSync(CancellationToken cancellationToken)
finally
{
_autoLoadHeadDeploymentTimer.Change(Timeout.Infinite, _autoLoadHeadDeploymentIntervalMs);
+ _watchdogTimer.Change(Timeout.Infinite, _watchdogIntervalMs);
_hostContext.Observer.TryNotify(() => new HostStoppedEvent(_hostContext.Identity));
}
}
@@ -111,6 +116,11 @@ public void LoadHeadDeployment()
_commandQueue.Add(new LoadCurrentHeadDeploymentCommand());
}
+ public void EnsureAllCellsAreRunningUnlessCancelled()
+ {
+ _commandQueue.Add(new EnsureAllCellsAreRunningUnlessCancelledCommand());
+ }
+
void Do(LoadCurrentHeadDeploymentCommand command, CancellationToken cancellationToken)
{
if (cancellationToken.IsCancellationRequested)
@@ -165,6 +175,19 @@ void Do(LoadDeploymentCommand command, CancellationToken cancellationToken)
}
}
+ void Do(EnsureAllCellsAreRunningUnlessCancelledCommand command, CancellationToken cancellationToken)
+ {
+ if (cancellationToken.IsCancellationRequested)
+ {
+ return;
+ }
+
+ foreach (var cell in _cells)
+ {
+ cell.Value.EnsureIsRunningUnlessCancelled();
+ }
+ }
+
void OnDeploymentChanged(SolutionHead newDeployment, SolutionDefinition newSolution, CancellationToken cancellationToken)
{
// 0. ANALYZE CELL LAYOUT CHANGES
@@ -212,7 +235,6 @@ void OnDeploymentChanged(SolutionHead newDeployment, SolutionDefinition newSolut
_cells.Remove(cell.Key);
cell.Value.Cancel();
}
- //Task.WaitAll(removed.Select(c => c.Value.Task).ToArray());
// 3. UPDATE CELLS STILL PRESENT
Please sign in to comment.
Something went wrong with that request. Please try again.